1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * TGSI to LLVM IR translation -- SoA.
32 *
33 * @author Jose Fonseca <[email protected]>
34 *
35 * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell,
36 * Brian Paul, and others.
37 */
38
39 #include "util/detect.h"
40 #include "pipe/p_shader_tokens.h"
41 #include "util/u_debug.h"
42 #include "util/u_math.h"
43 #include "util/u_memory.h"
44 #include "tgsi/tgsi_dump.h"
45 #include "tgsi/tgsi_exec.h"
46 #include "tgsi/tgsi_info.h"
47 #include "tgsi/tgsi_parse.h"
48 #include "tgsi/tgsi_util.h"
49 #include "tgsi/tgsi_scan.h"
50 #include "tgsi/tgsi_strings.h"
51 #include "lp_bld_tgsi_action.h"
52 #include "lp_bld_type.h"
53 #include "lp_bld_const.h"
54 #include "lp_bld_arit.h"
55 #include "lp_bld_bitarit.h"
56 #include "lp_bld_gather.h"
57 #include "lp_bld_init.h"
58 #include "lp_bld_logic.h"
59 #include "lp_bld_misc.h"
60 #include "lp_bld_swizzle.h"
61 #include "lp_bld_flow.h"
62 #include "lp_bld_coro.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_tgsi.h"
65 #include "lp_bld_limits.h"
66 #include "lp_bld_debug.h"
67 #include "lp_bld_printf.h"
68 #include "lp_bld_sample.h"
69 #include "lp_bld_struct.h"
70 #include "lp_bld_jit_types.h"
71
72 #define DUMP_GS_EMITS 0
73
74 /*
75 * If non-zero, the generated LLVM IR will print intermediate results on every TGSI
76 * instruction.
77 *
78 * TODO:
79 * - take execution masks in consideration
80 * - debug control-flow instructions
81 */
82 #define DEBUG_EXECUTION 0
83
84
85 /*
86 * Emit code to print a register value.
87 */
88 static void
emit_dump_reg(struct gallivm_state * gallivm,unsigned file,unsigned index,unsigned chan,LLVMValueRef value)89 emit_dump_reg(struct gallivm_state *gallivm,
90 unsigned file,
91 unsigned index,
92 unsigned chan,
93 LLVMValueRef value)
94 {
95 char buf[32];
96
97 snprintf(buf, sizeof buf, " %s[%u].%c = ",
98 tgsi_file_name(file),
99 index, "xyzw"[chan]);
100
101 lp_build_print_value(gallivm, buf, value);
102 }
103
104 static inline struct function_ctx *
func_ctx(struct lp_exec_mask * mask)105 func_ctx(struct lp_exec_mask *mask)
106 {
107 assert(mask->function_stack_size > 0);
108 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
109 return &mask->function_stack[mask->function_stack_size - 1];
110 }
111
112 /*
113 * combine the execution mask if there is one with the current mask.
114 */
115 static LLVMValueRef
mask_vec(struct lp_build_tgsi_context * bld_base)116 mask_vec(struct lp_build_tgsi_context *bld_base)
117 {
118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
119 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
120 struct lp_exec_mask *exec_mask = &bld->exec_mask;
121 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
122 if (!exec_mask->has_mask) {
123 return bld_mask;
124 }
125 if (!bld_mask)
126 return exec_mask->exec_mask;
127 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
128 exec_mask->exec_mask, "");
129 }
130
lp_exec_tgsi_break(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)131 static void lp_exec_tgsi_break(struct lp_exec_mask *mask,
132 struct lp_build_tgsi_context * bld_base)
133 {
134 enum tgsi_opcode opcode =
135 bld_base->instructions[bld_base->pc + 1].Instruction.Opcode;
136 bool break_always = (opcode == TGSI_OPCODE_ENDSWITCH ||
137 opcode == TGSI_OPCODE_CASE);
138 lp_exec_break(mask, &bld_base->pc, break_always);
139 }
140
lp_exec_switch(struct lp_exec_mask * mask,LLVMValueRef switchval)141 static void lp_exec_switch(struct lp_exec_mask *mask,
142 LLVMValueRef switchval)
143 {
144 struct function_ctx *ctx = func_ctx(mask);
145
146 if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING ||
147 ctx->loop_stack_size > LP_MAX_TGSI_NESTING) {
148 ctx->switch_stack_size++;
149 return;
150 }
151
152 ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] =
153 ctx->break_type;
154 ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH;
155
156 ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask;
157 ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val;
158 ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default;
159 ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default;
160 ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc;
161 ctx->switch_stack_size++;
162
163 mask->switch_mask = LLVMConstNull(mask->int_vec_type);
164 ctx->switch_val = switchval;
165 ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type);
166 ctx->switch_in_default = false;
167 ctx->switch_pc = 0;
168
169 lp_exec_mask_update(mask);
170 }
171
lp_exec_endswitch(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)172 static void lp_exec_endswitch(struct lp_exec_mask *mask,
173 struct lp_build_tgsi_context * bld_base)
174 {
175 LLVMBuilderRef builder = mask->bld->gallivm->builder;
176 struct function_ctx *ctx = func_ctx(mask);
177
178 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
179 ctx->switch_stack_size--;
180 return;
181 }
182
183 /* check if there's deferred default if so do it now */
184 if (ctx->switch_pc && !ctx->switch_in_default) {
185 LLVMValueRef prevmask, defaultmask;
186 unsigned tmp_pc;
187 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
188 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
189 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
190 ctx->switch_in_default = true;
191
192 lp_exec_mask_update(mask);
193
194 assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode ==
195 TGSI_OPCODE_DEFAULT);
196
197 tmp_pc = bld_base->pc;
198 bld_base->pc = ctx->switch_pc;
199 /*
200 * re-purpose switch_pc to point to here again, since we stop execution of
201 * the deferred default after next break.
202 */
203 ctx->switch_pc = tmp_pc - 1;
204
205 return;
206 }
207
208 else if (ctx->switch_pc && ctx->switch_in_default) {
209 assert(bld_base->pc == ctx->switch_pc + 1);
210 }
211
212 ctx->switch_stack_size--;
213 mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask;
214 ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val;
215 ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default;
216 ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default;
217 ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc;
218
219 ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size];
220
221 lp_exec_mask_update(mask);
222 }
223
lp_exec_case(struct lp_exec_mask * mask,LLVMValueRef caseval)224 static void lp_exec_case(struct lp_exec_mask *mask,
225 LLVMValueRef caseval)
226 {
227 LLVMBuilderRef builder = mask->bld->gallivm->builder;
228 struct function_ctx *ctx = func_ctx(mask);
229
230 LLVMValueRef casemask, prevmask;
231
232 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
233 return;
234 }
235
236 /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */
237 if (!ctx->switch_in_default) {
238 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
239 casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val);
240 ctx->switch_mask_default = LLVMBuildOr(builder, casemask,
241 ctx->switch_mask_default, "sw_default_mask");
242 casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, "");
243 mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask");
244
245 lp_exec_mask_update(mask);
246 }
247 }
248
249 /*
250 * Analyse default statement in a switch.
251 * \return true if default is last statement, false otherwise
252 * \param default_pc_start contains pc of instruction to jump to
253 * if default wasn't last but there's no
254 * fallthrough into default.
255 */
default_analyse_is_last(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base,int * default_pc_start)256 static bool default_analyse_is_last(struct lp_exec_mask *mask,
257 struct lp_build_tgsi_context * bld_base,
258 int *default_pc_start)
259 {
260 unsigned pc = bld_base->pc;
261 struct function_ctx *ctx = func_ctx(mask);
262 int curr_switch_stack = ctx->switch_stack_size;
263
264 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
265 return false;
266 }
267
268 /* skip over case statements which are together with default */
269 while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) {
270 pc++;
271 }
272
273 while (pc != ~0u && pc < bld_base->num_instructions) {
274 enum tgsi_opcode opcode = bld_base->instructions[pc].Instruction.Opcode;
275 switch (opcode) {
276 case TGSI_OPCODE_CASE:
277 if (curr_switch_stack == ctx->switch_stack_size) {
278 *default_pc_start = pc - 1;
279 return false;
280 }
281 break;
282 case TGSI_OPCODE_SWITCH:
283 curr_switch_stack++;
284 break;
285 case TGSI_OPCODE_ENDSWITCH:
286 if (curr_switch_stack == ctx->switch_stack_size) {
287 *default_pc_start = pc - 1;
288 return true;
289 }
290 curr_switch_stack--;
291 break;
292 default:
293 ; /* nothing */
294 }
295 pc++;
296 }
297 /* should never arrive here */
298 assert(0);
299 return true;
300 }
301
lp_exec_default(struct lp_exec_mask * mask,struct lp_build_tgsi_context * bld_base)302 static void lp_exec_default(struct lp_exec_mask *mask,
303 struct lp_build_tgsi_context * bld_base)
304 {
305 LLVMBuilderRef builder = mask->bld->gallivm->builder;
306 struct function_ctx *ctx = func_ctx(mask);
307
308 int default_exec_pc = 0;
309 bool default_is_last;
310
311 if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) {
312 return;
313 }
314
315 /*
316 * This is a messy opcode, because it may not be always at the end and
317 * there can be fallthrough in and out of it.
318 */
319
320 default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc);
321 /*
322 * If it is last statement in switch (note that case statements appearing
323 * "at the same time" as default don't change that) everything is just fine,
324 * update switch mask and go on. This means we can handle default with
325 * fallthrough INTO it without overhead, if it is last.
326 */
327 if (default_is_last) {
328 LLVMValueRef prevmask, defaultmask;
329 prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask;
330 defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask");
331 defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, "");
332 mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask");
333 ctx->switch_in_default = true;
334
335 lp_exec_mask_update(mask);
336 }
337 else {
338 /*
339 * Technically, "case" immediately before default isn't really a
340 * fallthrough, however we still have to count them as such as we
341 * already have updated the masks.
342 * If that happens in practice could add a switch optimizer pass
343 * which just gets rid of all case statements appearing together with
344 * default (or could do switch analysis at switch start time instead).
345 */
346 enum tgsi_opcode opcode =
347 bld_base->instructions[bld_base->pc - 1].Instruction.Opcode;
348 bool ft_into = (opcode != TGSI_OPCODE_BRK &&
349 opcode != TGSI_OPCODE_SWITCH);
350 /*
351 * If it is not last statement and there was no fallthrough into it,
352 * we record the PC and continue execution at next case (again, those
353 * case encountered at the same time don't count). At endswitch
354 * time, we update switchmask, and go back executing the code we skipped
355 * until the next break (possibly re-executing some code with changed mask
356 * if there was a fallthrough out of default).
357 * Finally, if it is not last statement and there was a fallthrough into it,
358 * do the same as with the former case, except instead of skipping the code
359 * just execute it without updating the mask, then go back and re-execute.
360 */
361 ctx->switch_pc = bld_base->pc;
362 if (!ft_into) {
363 bld_base->pc = default_exec_pc;
364 }
365 }
366 }
367
368
lp_exec_mask_call(struct lp_exec_mask * mask,int func,int * pc)369 static void lp_exec_mask_call(struct lp_exec_mask *mask,
370 int func,
371 int *pc)
372 {
373 if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) {
374 return;
375 }
376
377 lp_exec_mask_function_init(mask, mask->function_stack_size);
378 mask->function_stack[mask->function_stack_size].pc = *pc;
379 mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask;
380 mask->function_stack_size++;
381 *pc = func;
382 }
383
lp_exec_mask_ret(struct lp_exec_mask * mask,int * pc)384 static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc)
385 {
386 LLVMBuilderRef builder = mask->bld->gallivm->builder;
387 struct function_ctx *ctx = func_ctx(mask);
388 LLVMValueRef exec_mask;
389
390 if (ctx->cond_stack_size == 0 &&
391 ctx->loop_stack_size == 0 &&
392 ctx->switch_stack_size == 0 &&
393 mask->function_stack_size == 1) {
394 /* returning from main() */
395 *pc = -1;
396 return;
397 }
398
399 if (mask->function_stack_size == 1) {
400 /*
401 * This requires special handling since we need to ensure
402 * we don't drop the mask even if we have no call stack
403 * (e.g. after a ret in a if clause after the endif)
404 */
405 mask->ret_in_main = true;
406 }
407
408 exec_mask = LLVMBuildNot(builder,
409 mask->exec_mask,
410 "ret");
411
412 mask->ret_mask = LLVMBuildAnd(builder,
413 mask->ret_mask,
414 exec_mask, "ret_full");
415
416 lp_exec_mask_update(mask);
417 }
418
lp_exec_mask_bgnsub(struct lp_exec_mask * mask)419 static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask)
420 {
421 }
422
lp_exec_mask_endsub(struct lp_exec_mask * mask,int * pc)423 static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc)
424 {
425 struct function_ctx *ctx;
426
427 assert(mask->function_stack_size > 1);
428 assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS);
429
430 ctx = func_ctx(mask);
431 mask->function_stack_size--;
432
433 *pc = ctx->pc;
434 mask->ret_mask = ctx->ret_mask;
435
436 lp_exec_mask_update(mask);
437 }
438
439
440 static LLVMValueRef
get_file_ptr(struct lp_build_tgsi_soa_context * bld,unsigned file,int index,unsigned chan)441 get_file_ptr(struct lp_build_tgsi_soa_context *bld,
442 unsigned file,
443 int index,
444 unsigned chan)
445 {
446 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
447 LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS];
448 LLVMValueRef var_of_array;
449 LLVMTypeRef type_of_array;
450
451 switch (file) {
452 case TGSI_FILE_TEMPORARY:
453 array_of_vars = bld->temps;
454 var_of_array = bld->temps_array;
455 type_of_array = bld->temps_array_type;
456 break;
457 case TGSI_FILE_OUTPUT:
458 array_of_vars = bld->outputs;
459 var_of_array = bld->outputs_array;
460 type_of_array = bld->outputs_array_type;
461 break;
462 default:
463 assert(0);
464 return NULL;
465 }
466
467 assert(chan < 4);
468
469 if (bld->indirect_files & (1 << file)) {
470 LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan);
471 /* I'm not sure the other path ever gets hit, but leave until someone figures it out,
472 this check doesn't work with opaque pointers. */
473 if (1) {//LLVMGetTypeKind(LLVMGetElementType(LLVMTypeOf(var_of_array))) == LLVMArrayTypeKind) {
474 LLVMValueRef gep[2];
475 gep[0] = lp_build_const_int32(bld->bld_base.base.gallivm, 0);
476 gep[1] = lindex;
477 return LLVMBuildGEP2(builder, type_of_array, var_of_array, gep, 2, "");
478 } else {
479 return LLVMBuildGEP2(builder, type_of_array, var_of_array, &lindex, 1, "");
480 }
481 }
482 else {
483 assert(index <= bld->bld_base.info->file_max[file]);
484 return array_of_vars[index][chan];
485 }
486 }
487
488
489 /**
490 * Return pointer to a temporary register channel (src or dest).
491 * Note that indirect addressing cannot be handled here.
492 * \param index which temporary register
493 * \param chan which channel of the temp register.
494 */
495 LLVMValueRef
lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)496 lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld,
497 unsigned index,
498 unsigned chan)
499 {
500 return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan);
501 }
502
503 /**
504 * Return pointer to a output register channel (src or dest).
505 * Note that indirect addressing cannot be handled here.
506 * \param index which output register
507 * \param chan which channel of the output register.
508 */
509 LLVMValueRef
lp_get_output_ptr(struct lp_build_tgsi_soa_context * bld,unsigned index,unsigned chan)510 lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld,
511 unsigned index,
512 unsigned chan)
513 {
514 return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan);
515 }
516
517 /*
518 * If we have indirect addressing in outputs copy our alloca array
519 * to the outputs slots specified by the caller to make sure
520 * our outputs are delivered consistently via the same interface.
521 */
522 static void
gather_outputs(struct lp_build_tgsi_soa_context * bld)523 gather_outputs(struct lp_build_tgsi_soa_context * bld)
524 {
525 if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
526 unsigned index, chan;
527 assert(bld->bld_base.info->num_outputs <=
528 bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1);
529 for (index = 0; index < bld->bld_base.info->num_outputs; ++index) {
530 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
531 bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan);
532 }
533 }
534 }
535 }
536
537 /**
538 * Gather vector.
539 * XXX the lp_build_gather() function should be capable of doing this
540 * with a little work.
541 */
542 static LLVMValueRef
build_gather(struct lp_build_tgsi_context * bld_base,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)543 build_gather(struct lp_build_tgsi_context *bld_base,
544 LLVMValueRef base_ptr,
545 LLVMValueRef indexes,
546 LLVMValueRef overflow_mask,
547 LLVMValueRef indexes2)
548 {
549 struct gallivm_state *gallivm = bld_base->base.gallivm;
550 LLVMBuilderRef builder = gallivm->builder;
551 struct lp_build_context *uint_bld = &bld_base->uint_bld;
552 struct lp_build_context *bld = &bld_base->base;
553 LLVMValueRef res;
554 unsigned i;
555
556 if (indexes2)
557 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
558 else
559 res = bld->undef;
560 /*
561 * overflow_mask is a vector telling us which channels
562 * in the vector overflowed. We use the overflow behavior for
563 * constant buffers which is defined as:
564 * Out of bounds access to constant buffer returns 0 in all
565 * components. Out of bounds behavior is always with respect
566 * to the size of the buffer bound at that slot.
567 */
568
569 if (overflow_mask) {
570 /*
571 * We avoid per-element control flow here (also due to llvm going crazy,
572 * though I suspect it's better anyway since overflow is likely rare).
573 * Note that since we still fetch from buffers even if num_elements was
574 * zero (in this case we'll fetch from index zero) the jit func callers
575 * MUST provide valid fake constant buffers of size 4x32 (the values do
576 * not matter), otherwise we'd still need (not per element though)
577 * control flow.
578 */
579 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
580 if (indexes2)
581 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
582 }
583
584 /*
585 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
586 */
587 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
588 LLVMValueRef si, di;
589 LLVMValueRef index;
590 LLVMValueRef scalar_ptr, scalar;
591
592 di = lp_build_const_int32(bld->gallivm, i);
593 if (indexes2)
594 si = lp_build_const_int32(bld->gallivm, i >> 1);
595 else
596 si = di;
597
598 if (indexes2 && (i & 1)) {
599 index = LLVMBuildExtractElement(builder,
600 indexes2, si, "");
601 } else {
602 index = LLVMBuildExtractElement(builder,
603 indexes, si, "");
604 }
605 scalar_ptr = LLVMBuildGEP2(builder, bld->elem_type, base_ptr,
606 &index, 1, "gather_ptr");
607 scalar = LLVMBuildLoad2(builder, bld->elem_type, scalar_ptr, "");
608
609 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
610 }
611
612 if (overflow_mask) {
613 if (indexes2) {
614 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
615 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
616 bld_base->dbl_bld.int_vec_type, "");
617 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
618 bld_base->dbl_bld.zero, res);
619 } else
620 res = lp_build_select(bld, overflow_mask, bld->zero, res);
621 }
622
623 return res;
624 }
625
626
627 /**
628 * Scatter/store vector.
629 */
630 static void
emit_mask_scatter(struct lp_build_tgsi_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)631 emit_mask_scatter(struct lp_build_tgsi_soa_context *bld,
632 LLVMValueRef base_ptr,
633 LLVMValueRef indexes,
634 LLVMValueRef values,
635 struct lp_exec_mask *mask)
636 {
637 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
638 LLVMBuilderRef builder = gallivm->builder;
639 unsigned i;
640 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
641
642 /*
643 * Loop over elements of index_vec, store scalar value.
644 */
645 for (i = 0; i < bld->bld_base.base.type.length; i++) {
646 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
647 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
648 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, bld->bld_base.base.elem_type, base_ptr, &index, 1, "scatter_ptr");
649 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
650 LLVMValueRef scalar_pred = pred ?
651 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
652
653 if (0)
654 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
655 ii, val, index, scalar_ptr);
656
657 if (scalar_pred) {
658 LLVMValueRef real_val, dst_val;
659 dst_val = LLVMBuildLoad2(builder, bld->bld_base.base.elem_type, scalar_ptr, "");
660 real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val);
661 LLVMBuildStore(builder, real_val, scalar_ptr);
662 }
663 else {
664 LLVMBuildStore(builder, val, scalar_ptr);
665 }
666 }
667 }
668
669
670 /**
671 * Read the current value of the ADDR register, convert the floats to
672 * ints, add the base index and return the vector of offsets.
673 * The offsets will be used to index into the constant buffer or
674 * temporary register file.
675 */
676 static LLVMValueRef
get_indirect_index(struct lp_build_tgsi_soa_context * bld,unsigned reg_file,unsigned reg_index,const struct tgsi_ind_register * indirect_reg,int index_limit)677 get_indirect_index(struct lp_build_tgsi_soa_context *bld,
678 unsigned reg_file, unsigned reg_index,
679 const struct tgsi_ind_register *indirect_reg,
680 int index_limit)
681 {
682 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
683 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
684 /* always use X component of address register */
685 unsigned swizzle = indirect_reg->Swizzle;
686 LLVMValueRef base;
687 LLVMValueRef rel;
688 LLVMValueRef max_index;
689 LLVMValueRef index;
690
691 assert(bld->indirect_files & (1 << reg_file));
692
693 base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index);
694
695 assert(swizzle < 4);
696 switch (indirect_reg->File) {
697 case TGSI_FILE_ADDRESS:
698 rel = LLVMBuildLoad2(builder,
699 bld->bld_base.base.int_vec_type,
700 bld->addr[indirect_reg->Index][swizzle],
701 "load addr reg");
702 /* ADDR LLVM values already have LLVM integer type. */
703 break;
704 case TGSI_FILE_TEMPORARY:
705 rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle);
706 rel = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, rel, "load temp reg");
707 /* TEMP LLVM values always have LLVM float type, but for indirection, the
708 * value actually stored is expected to be an integer */
709 rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, "");
710 break;
711 default:
712 assert(0);
713 rel = uint_bld->zero;
714 }
715
716 index = lp_build_add(uint_bld, base, rel);
717
718 /*
719 * emit_fetch_constant handles constant buffer overflow so this code
720 * is pointless for them.
721 * Furthermore the D3D10 spec in section 6.5 says:
722 * If the constant buffer bound to a slot is larger than the size
723 * declared in the shader for that slot, implementations are allowed
724 * to return incorrect data (not necessarily 0) for indices that are
725 * larger than the declared size but smaller than the buffer size.
726 */
727 if (reg_file != TGSI_FILE_CONSTANT) {
728 assert(index_limit >= 0);
729 max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm,
730 uint_bld->type, index_limit);
731
732 assert(!uint_bld->type.sign);
733 index = lp_build_min(uint_bld, index, max_index);
734 }
735
736 return index;
737 }
738
739 static struct lp_build_context *
stype_to_fetch(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype)740 stype_to_fetch(struct lp_build_tgsi_context * bld_base,
741 enum tgsi_opcode_type stype)
742 {
743 struct lp_build_context *bld_fetch;
744
745 switch (stype) {
746 case TGSI_TYPE_FLOAT:
747 case TGSI_TYPE_UNTYPED:
748 bld_fetch = &bld_base->base;
749 break;
750 case TGSI_TYPE_UNSIGNED:
751 bld_fetch = &bld_base->uint_bld;
752 break;
753 case TGSI_TYPE_SIGNED:
754 bld_fetch = &bld_base->int_bld;
755 break;
756 case TGSI_TYPE_DOUBLE:
757 bld_fetch = &bld_base->dbl_bld;
758 break;
759 case TGSI_TYPE_UNSIGNED64:
760 bld_fetch = &bld_base->uint64_bld;
761 break;
762 case TGSI_TYPE_SIGNED64:
763 bld_fetch = &bld_base->int64_bld;
764 break;
765 case TGSI_TYPE_VOID:
766 default:
767 assert(0);
768 bld_fetch = NULL;
769 break;
770 }
771 return bld_fetch;
772 }
773
774 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,unsigned chan_index,bool need_perelement_offset)775 get_soa_array_offsets(struct lp_build_context *uint_bld,
776 LLVMValueRef indirect_index,
777 unsigned chan_index,
778 bool need_perelement_offset)
779 {
780 struct gallivm_state *gallivm = uint_bld->gallivm;
781 LLVMValueRef chan_vec =
782 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
783 LLVMValueRef length_vec =
784 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
785 LLVMValueRef index_vec;
786
787 /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */
788 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
789 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
790 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
791
792 if (need_perelement_offset) {
793 LLVMValueRef pixel_offsets;
794 unsigned i;
795 /* build pixel offset vector: {0, 1, 2, 3, ...} */
796 pixel_offsets = uint_bld->undef;
797 for (i = 0; i < uint_bld->type.length; i++) {
798 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
799 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
800 ii, ii, "");
801 }
802 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
803 }
804 return index_vec;
805 }
806
807 static LLVMValueRef
emit_fetch_constant(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)808 emit_fetch_constant(
809 struct lp_build_tgsi_context * bld_base,
810 const struct tgsi_full_src_register * reg,
811 enum tgsi_opcode_type stype,
812 unsigned swizzle_in)
813 {
814 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
815 struct gallivm_state *gallivm = bld_base->base.gallivm;
816 LLVMBuilderRef builder = gallivm->builder;
817 struct lp_build_context *uint_bld = &bld_base->uint_bld;
818 unsigned dimension = 0;
819 LLVMValueRef consts_ptr;
820 LLVMValueRef num_consts;
821 LLVMValueRef res;
822 unsigned swizzle = swizzle_in & 0xffff;
823
824 /* XXX: Handle fetching xyzw components as a vector */
825 assert(swizzle != ~0u);
826
827 if (reg->Register.Dimension) {
828 assert(!reg->Dimension.Indirect);
829 dimension = reg->Dimension.Index;
830 assert(dimension < LP_MAX_TGSI_CONST_BUFFERS);
831 }
832
833 consts_ptr = bld->consts[dimension];
834 num_consts = bld->consts_sizes[dimension];
835
836 if (reg->Register.Indirect) {
837 LLVMValueRef indirect_index;
838 LLVMValueRef swizzle_vec =
839 lp_build_const_int_vec(gallivm, uint_bld->type, swizzle);
840 LLVMValueRef index_vec; /* index into the const buffer */
841 LLVMValueRef overflow_mask;
842 LLVMValueRef index_vec2 = NULL;
843
844 indirect_index = get_indirect_index(bld,
845 reg->Register.File,
846 reg->Register.Index,
847 ®->Indirect,
848 bld->bld_base.info->file_max[reg->Register.File]);
849
850 /* All fetches are from the same constant buffer, so
851 * we need to propagate the size to a vector to do a
852 * vector comparison */
853 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
854 /* Construct a boolean vector telling us which channels
855 * overflow the bound constant buffer */
856 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
857 indirect_index, num_consts);
858
859 /* index_vec = indirect_index * 4 + swizzle */
860 index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
861 index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec);
862
863 if (tgsi_type_is_64bit(stype)) {
864 LLVMValueRef swizzle_vec2;
865 swizzle_vec2 = lp_build_const_int_vec(gallivm, uint_bld->type, swizzle_in >> 16);
866 index_vec2 = lp_build_shl_imm(uint_bld, indirect_index, 2);
867 index_vec2 = lp_build_add(uint_bld, index_vec2, swizzle_vec2);
868 }
869 /* Gather values from the constant buffer */
870 res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, index_vec2);
871 }
872 else {
873 LLVMValueRef index; /* index into the const buffer */
874 LLVMValueRef scalar, scalar_ptr;
875 struct lp_build_context *bld_broad = &bld_base->base;
876 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
877
878 scalar_ptr = LLVMBuildGEP2(builder, bld_broad->elem_type, consts_ptr,
879 &index, 1, "");
880
881 if (tgsi_type_is_64bit(stype) && ((swizzle_in >> 16) != swizzle + 1)) {
882
883 LLVMValueRef scalar2, scalar2_ptr;
884 LLVMValueRef shuffles[2];
885 index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + (swizzle_in >> 16));
886
887 scalar2_ptr = LLVMBuildGEP2(builder, bld_broad->elem_type, consts_ptr,
888 &index, 1, "");
889
890 scalar = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar_ptr, "");
891 scalar2 = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar2_ptr, "");
892 shuffles[0] = lp_build_const_int32(gallivm, 0);
893 shuffles[1] = lp_build_const_int32(gallivm, 1);
894
895 res = LLVMGetUndef(LLVMVectorType(bld_broad->elem_type, bld_base->base.type.length * 2));
896 res = LLVMBuildInsertElement(builder, res, scalar, shuffles[0], "");
897 res = LLVMBuildInsertElement(builder, res, scalar2, shuffles[1], "");
898 } else {
899 if (stype == TGSI_TYPE_DOUBLE) {
900 LLVMTypeRef dptr_type = LLVMPointerType(LLVMDoubleTypeInContext(gallivm->context), 0);
901 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, dptr_type, "");
902 bld_broad = &bld_base->dbl_bld;
903 } else if (stype == TGSI_TYPE_UNSIGNED64) {
904 LLVMTypeRef u64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
905 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, u64ptr_type, "");
906 bld_broad = &bld_base->uint64_bld;
907 } else if (stype == TGSI_TYPE_SIGNED64) {
908 LLVMTypeRef i64ptr_type = LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0);
909 scalar_ptr = LLVMBuildBitCast(builder, scalar_ptr, i64ptr_type, "");
910 bld_broad = &bld_base->int64_bld;
911 }
912 scalar = LLVMBuildLoad2(builder, bld_broad->elem_type, scalar_ptr, "");
913 res = lp_build_broadcast_scalar(bld_broad, scalar);
914 }
915
916 }
917
918 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || stype == TGSI_TYPE_DOUBLE || stype == TGSI_TYPE_SIGNED64 || stype == TGSI_TYPE_UNSIGNED64) {
919 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
920 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
921 }
922
923 return res;
924 }
925
926 /**
927 * Fetch 64-bit values from two separate channels.
928 * 64-bit values are stored split across two channels, like xy and zw.
929 * This function creates a set of vec_length*2 floats,
930 * extracts the values from the two channels,
931 * puts them in the correct place, then casts to vec_length 64-bits.
932 */
933 static LLVMValueRef
emit_fetch_64bit(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type stype,LLVMValueRef input,LLVMValueRef input2)934 emit_fetch_64bit(
935 struct lp_build_tgsi_context * bld_base,
936 enum tgsi_opcode_type stype,
937 LLVMValueRef input,
938 LLVMValueRef input2)
939 {
940 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
941 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
942 LLVMBuilderRef builder = gallivm->builder;
943 LLVMValueRef res;
944 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
945 int i;
946 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
947 int len = bld_base->base.type.length * 2;
948 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
949
950 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
951 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
952 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
953 }
954 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
955
956 return LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
957 }
958
959 static LLVMValueRef
emit_fetch_immediate(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)960 emit_fetch_immediate(
961 struct lp_build_tgsi_context * bld_base,
962 const struct tgsi_full_src_register * reg,
963 enum tgsi_opcode_type stype,
964 unsigned swizzle_in)
965 {
966 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
967 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
968 LLVMBuilderRef builder = gallivm->builder;
969 LLVMValueRef res = NULL;
970 unsigned swizzle = swizzle_in & 0xffff;
971
972 if (bld->use_immediates_array || reg->Register.Indirect) {
973 LLVMValueRef imms_array;
974 LLVMTypeRef fptr_type;
975
976 /* cast imms_array pointer to float* */
977 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
978 imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, "");
979
980 if (reg->Register.Indirect) {
981 LLVMValueRef indirect_index;
982 LLVMValueRef index_vec; /* index into the immediate register array */
983 LLVMValueRef index_vec2 = NULL;
984 indirect_index = get_indirect_index(bld,
985 reg->Register.File,
986 reg->Register.Index,
987 ®->Indirect,
988 bld->bld_base.info->file_max[reg->Register.File]);
989 /*
990 * Unlike for other reg classes, adding pixel offsets is unnecessary -
991 * immediates are stored as full vectors (FIXME??? - might be better
992 * to store them the same as constants) but all elements are the same
993 * in any case.
994 */
995 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
996 indirect_index,
997 swizzle,
998 false);
999 if (tgsi_type_is_64bit(stype))
1000 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1001 indirect_index,
1002 swizzle_in >> 16,
1003 false);
1004 /* Gather values from the immediate register array */
1005 res = build_gather(bld_base, imms_array, index_vec, NULL, index_vec2);
1006 } else {
1007 LLVMValueRef gep[2];
1008 gep[0] = lp_build_const_int32(gallivm, 0);
1009 gep[1] = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle);
1010 LLVMValueRef imms_ptr = LLVMBuildGEP2(builder,
1011 bld_base->base.vec_type,
1012 bld->imms_array, gep, 2, "");
1013 res = LLVMBuildLoad2(builder, bld_base->base.vec_type, imms_ptr, "");
1014
1015 if (tgsi_type_is_64bit(stype)) {
1016 LLVMValueRef imms_ptr2;
1017 LLVMValueRef res2;
1018 gep[1] = lp_build_const_int32(gallivm,
1019 reg->Register.Index * 4 + (swizzle_in >> 16));
1020 imms_ptr2 = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1021 bld->imms_array, gep, 2, "");
1022 res2 = LLVMBuildLoad2(builder, bld_base->base.vec_type, imms_ptr2, "");
1023 res = emit_fetch_64bit(bld_base, stype, res, res2);
1024 }
1025 }
1026 }
1027 else {
1028 res = bld->immediates[reg->Register.Index][swizzle];
1029 if (tgsi_type_is_64bit(stype))
1030 res = emit_fetch_64bit(bld_base, stype, res, bld->immediates[reg->Register.Index][swizzle_in >> 16]);
1031 }
1032
1033 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1034 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1035 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1036 }
1037 return res;
1038 }
1039
1040 static LLVMValueRef
emit_fetch_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1041 emit_fetch_input(
1042 struct lp_build_tgsi_context * bld_base,
1043 const struct tgsi_full_src_register * reg,
1044 enum tgsi_opcode_type stype,
1045 unsigned swizzle_in)
1046 {
1047 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1048 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1049 LLVMBuilderRef builder = gallivm->builder;
1050 LLVMValueRef res;
1051 unsigned swizzle = swizzle_in & 0xffff;
1052
1053 if (reg->Register.Indirect) {
1054 LLVMValueRef indirect_index;
1055 LLVMValueRef index_vec; /* index into the input reg array */
1056 LLVMValueRef index_vec2 = NULL;
1057 LLVMValueRef inputs_array;
1058 LLVMTypeRef fptr_type;
1059
1060 indirect_index = get_indirect_index(bld,
1061 reg->Register.File,
1062 reg->Register.Index,
1063 ®->Indirect,
1064 bld->bld_base.info->file_max[reg->Register.File]);
1065
1066 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1067 indirect_index,
1068 swizzle,
1069 true);
1070 if (tgsi_type_is_64bit(stype)) {
1071 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1072 indirect_index,
1073 swizzle_in >> 16,
1074 true);
1075 }
1076 /* cast inputs_array pointer to float* */
1077 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1078 inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, "");
1079
1080 /* Gather values from the input register array */
1081 res = build_gather(bld_base, inputs_array, index_vec, NULL, index_vec2);
1082 } else {
1083 if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) {
1084 LLVMValueRef lindex = lp_build_const_int32(gallivm,
1085 reg->Register.Index * 4 + swizzle);
1086 LLVMValueRef input_ptr = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1087 bld->inputs_array, &lindex, 1, "");
1088
1089 res = LLVMBuildLoad2(builder, bld_base->base.vec_type, input_ptr, "");
1090 if (tgsi_type_is_64bit(stype)) {
1091 LLVMValueRef lindex1;
1092 LLVMValueRef input_ptr2;
1093 LLVMValueRef res2;
1094
1095 lindex1 = lp_build_const_int32(gallivm,
1096 reg->Register.Index * 4 + (swizzle_in >> 16));
1097 input_ptr2 = LLVMBuildGEP2(builder, bld_base->base.vec_type,
1098 bld->inputs_array, &lindex1, 1, "");
1099 res2 = LLVMBuildLoad2(builder, bld_base->base.vec_type, input_ptr2, "");
1100 res = emit_fetch_64bit(bld_base, stype, res, res2);
1101 }
1102 }
1103 else {
1104 res = bld->inputs[reg->Register.Index][swizzle];
1105 if (tgsi_type_is_64bit(stype))
1106 res = emit_fetch_64bit(bld_base, stype, res, bld->inputs[reg->Register.Index][swizzle_in >> 16]);
1107 }
1108 }
1109
1110 assert(res);
1111
1112 if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED || tgsi_type_is_64bit(stype)) {
1113 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1114 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1115 }
1116
1117 return res;
1118 }
1119
1120
1121 static LLVMValueRef
emit_fetch_gs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1122 emit_fetch_gs_input(
1123 struct lp_build_tgsi_context * bld_base,
1124 const struct tgsi_full_src_register * reg,
1125 enum tgsi_opcode_type stype,
1126 unsigned swizzle_in)
1127 {
1128 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1129 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1130 const struct tgsi_shader_info *info = bld->bld_base.info;
1131 LLVMBuilderRef builder = gallivm->builder;
1132 LLVMValueRef attrib_index = NULL;
1133 LLVMValueRef vertex_index = NULL;
1134 unsigned swizzle = swizzle_in & 0xffff;
1135 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1136 LLVMValueRef res;
1137
1138 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1139 /* This is really a system value not a regular input */
1140 assert(!reg->Register.Indirect);
1141 assert(!reg->Dimension.Indirect);
1142 res = bld->system_values.prim_id;
1143 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1144 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1145 }
1146 return res;
1147 }
1148
1149 if (reg->Register.Indirect) {
1150 /*
1151 * XXX: this is possibly not quite the right value, since file_max may be
1152 * larger than the max attrib index, due to it being the max of declared
1153 * inputs AND the max vertices per prim (which is 6 for tri adj).
1154 * It should however be safe to use (since we always allocate
1155 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1156 */
1157 int index_limit = info->file_max[reg->Register.File];
1158 attrib_index = get_indirect_index(bld,
1159 reg->Register.File,
1160 reg->Register.Index,
1161 ®->Indirect,
1162 index_limit);
1163 } else {
1164 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1165 }
1166
1167 if (reg->Dimension.Indirect) {
1168 /*
1169 * A fixed 6 should do as well (which is what we allocate).
1170 */
1171 int index_limit = mesa_vertices_per_prim(info->properties[TGSI_PROPERTY_GS_INPUT_PRIM]);
1172 vertex_index = get_indirect_index(bld,
1173 reg->Register.File,
1174 reg->Dimension.Index,
1175 ®->DimIndirect,
1176 index_limit);
1177 } else {
1178 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1179 }
1180
1181 res = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1182 reg->Dimension.Indirect,
1183 vertex_index,
1184 reg->Register.Indirect,
1185 attrib_index,
1186 swizzle_index);
1187
1188 assert(res);
1189 if (tgsi_type_is_64bit(stype)) {
1190 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1191 LLVMValueRef res2;
1192 res2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
1193 reg->Dimension.Indirect,
1194 vertex_index,
1195 reg->Register.Indirect,
1196 attrib_index,
1197 swizzle_index);
1198 assert(res2);
1199 res = emit_fetch_64bit(bld_base, stype, res, res2);
1200 } else if (stype == TGSI_TYPE_UNSIGNED) {
1201 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1202 } else if (stype == TGSI_TYPE_SIGNED) {
1203 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1204 }
1205
1206 return res;
1207 }
1208
1209 static LLVMValueRef
emit_fetch_tcs_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1210 emit_fetch_tcs_input(
1211 struct lp_build_tgsi_context * bld_base,
1212 const struct tgsi_full_src_register * reg,
1213 enum tgsi_opcode_type stype,
1214 unsigned swizzle_in)
1215 {
1216 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1217 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1218 const struct tgsi_shader_info *info = bld->bld_base.info;
1219 LLVMBuilderRef builder = gallivm->builder;
1220 LLVMValueRef attrib_index = NULL;
1221 LLVMValueRef vertex_index = NULL;
1222 unsigned swizzle = swizzle_in & 0xffff;
1223 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1224 LLVMValueRef res;
1225
1226 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1227 /* This is really a system value not a regular input */
1228 assert(!reg->Register.Indirect);
1229 assert(!reg->Dimension.Indirect);
1230 res = bld->system_values.prim_id;
1231 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1232 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1233 }
1234 return res;
1235 }
1236
1237 if (reg->Register.Indirect) {
1238 int index_limit = info->file_max[reg->Register.File];
1239 attrib_index = get_indirect_index(bld,
1240 reg->Register.File,
1241 reg->Register.Index,
1242 ®->Indirect,
1243 index_limit);
1244 } else {
1245 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1246 }
1247
1248 if (reg->Dimension.Indirect) {
1249 vertex_index = get_indirect_index(bld,
1250 reg->Register.File,
1251 reg->Dimension.Index,
1252 ®->DimIndirect,
1253 PIPE_MAX_SHADER_INPUTS);
1254 } else {
1255 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1256 }
1257
1258 // TCS can read from its own outputs
1259 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1260 res = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1261 reg->Dimension.Indirect,
1262 vertex_index,
1263 reg->Register.Indirect,
1264 attrib_index,
1265 false,
1266 swizzle_index,
1267 bld_base->info->output_semantic_name[reg->Register.Index]);
1268 } else {
1269 res = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1270 reg->Dimension.Indirect,
1271 vertex_index,
1272 reg->Register.Indirect,
1273 attrib_index,
1274 false,
1275 swizzle_index);
1276 }
1277
1278
1279 assert(res);
1280 if (tgsi_type_is_64bit(stype)) {
1281 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1282 LLVMValueRef res2;
1283 if (reg->Register.File == TGSI_FILE_OUTPUT) {
1284 res2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1285 reg->Dimension.Indirect,
1286 vertex_index,
1287 reg->Register.Indirect,
1288 attrib_index,
1289 false,
1290 swizzle_index,
1291 bld_base->info->output_semantic_name[reg->Register.Index]);
1292 } else {
1293 res2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, (struct lp_build_context*)bld_base,
1294 reg->Dimension.Indirect,
1295 vertex_index,
1296 reg->Register.Indirect,
1297 attrib_index,
1298 false,
1299 swizzle_index);
1300 }
1301 assert(res2);
1302 res = emit_fetch_64bit(bld_base, stype, res, res2);
1303 } else if (stype == TGSI_TYPE_UNSIGNED) {
1304 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1305 } else if (stype == TGSI_TYPE_SIGNED) {
1306 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1307 }
1308
1309 return res;
1310 }
1311
1312 static LLVMValueRef
emit_fetch_tes_input(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1313 emit_fetch_tes_input(
1314 struct lp_build_tgsi_context * bld_base,
1315 const struct tgsi_full_src_register * reg,
1316 enum tgsi_opcode_type stype,
1317 unsigned swizzle_in)
1318 {
1319 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1320 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1321 const struct tgsi_shader_info *info = bld->bld_base.info;
1322 LLVMBuilderRef builder = gallivm->builder;
1323 LLVMValueRef attrib_index = NULL;
1324 LLVMValueRef vertex_index = NULL;
1325 unsigned swizzle = swizzle_in & 0xffff;
1326 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle);
1327 LLVMValueRef res;
1328
1329 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) {
1330 /* This is really a system value not a regular input */
1331 assert(!reg->Register.Indirect);
1332 assert(!reg->Dimension.Indirect);
1333 res = bld->system_values.prim_id;
1334 if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) {
1335 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1336 }
1337 return res;
1338 }
1339
1340 if (reg->Register.Indirect) {
1341 int index_limit = info->file_max[reg->Register.File];
1342 attrib_index = get_indirect_index(bld,
1343 reg->Register.File,
1344 reg->Register.Index,
1345 ®->Indirect,
1346 index_limit);
1347 } else {
1348 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1349 }
1350
1351 if (reg->Dimension.Indirect) {
1352 vertex_index = get_indirect_index(bld,
1353 reg->Register.File,
1354 reg->Dimension.Index,
1355 ®->DimIndirect,
1356 PIPE_MAX_SHADER_INPUTS);
1357 } else {
1358 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1359 }
1360
1361 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1362 res = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1363 reg->Register.Indirect,
1364 attrib_index,
1365 swizzle_index);
1366 } else {
1367 res = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1368 reg->Dimension.Indirect,
1369 vertex_index,
1370 reg->Register.Indirect,
1371 attrib_index,
1372 false,
1373 swizzle_index);
1374 }
1375
1376 assert(res);
1377 if (tgsi_type_is_64bit(stype)) {
1378 LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle_in >> 16);
1379 LLVMValueRef res2;
1380 if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PATCH) {
1381 res2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1382 reg->Register.Indirect,
1383 attrib_index,
1384 swizzle_index);
1385 }
1386 else {
1387 res2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, (struct lp_build_context*)bld_base,
1388 reg->Dimension.Indirect,
1389 vertex_index,
1390 reg->Register.Indirect,
1391 attrib_index,
1392 false,
1393 swizzle_index);
1394 }
1395 assert(res2);
1396 res = emit_fetch_64bit(bld_base, stype, res, res2);
1397 } else if (stype == TGSI_TYPE_UNSIGNED) {
1398 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1399 } else if (stype == TGSI_TYPE_SIGNED) {
1400 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1401 }
1402
1403 return res;
1404 }
1405
1406
1407
1408 static LLVMValueRef
emit_fetch_temporary(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1409 emit_fetch_temporary(
1410 struct lp_build_tgsi_context * bld_base,
1411 const struct tgsi_full_src_register * reg,
1412 enum tgsi_opcode_type stype,
1413 unsigned swizzle_in)
1414 {
1415 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1416 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1417 LLVMBuilderRef builder = gallivm->builder;
1418 LLVMValueRef res;
1419 unsigned swizzle = swizzle_in & 0xffff;
1420
1421 if (reg->Register.Indirect) {
1422 LLVMValueRef indirect_index;
1423 LLVMValueRef index_vec, index_vec2 = NULL; /* index into the temp reg array */
1424 LLVMValueRef temps_array;
1425 LLVMTypeRef fptr_type;
1426
1427 indirect_index = get_indirect_index(bld,
1428 reg->Register.File,
1429 reg->Register.Index,
1430 ®->Indirect,
1431 bld->bld_base.info->file_max[reg->Register.File]);
1432
1433 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1434 indirect_index,
1435 swizzle,
1436 true);
1437 if (tgsi_type_is_64bit(stype)) {
1438 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
1439 indirect_index,
1440 swizzle_in >> 16,
1441 true);
1442 }
1443
1444 /* cast temps_array pointer to float* */
1445 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1446 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1447
1448 /* Gather values from the temporary register array */
1449 res = build_gather(bld_base, temps_array, index_vec, NULL, index_vec2);
1450 }
1451 else {
1452 LLVMValueRef temp_ptr;
1453 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
1454 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle);
1455 res = LLVMBuildLoad2(builder, vec_type, temp_ptr, "");
1456
1457 if (tgsi_type_is_64bit(stype)) {
1458 LLVMValueRef temp_ptr2, res2;
1459
1460 temp_ptr2 = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle_in >> 16);
1461 res2 = LLVMBuildLoad2(builder, vec_type, temp_ptr2, "");
1462 res = emit_fetch_64bit(bld_base, stype, res, res2);
1463 }
1464 }
1465
1466 if (stype == TGSI_TYPE_SIGNED ||
1467 stype == TGSI_TYPE_UNSIGNED ||
1468 stype == TGSI_TYPE_DOUBLE ||
1469 stype == TGSI_TYPE_SIGNED64 ||
1470 stype == TGSI_TYPE_UNSIGNED64) {
1471 struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype);
1472 res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, "");
1473 }
1474
1475 return res;
1476 }
1477
1478 static LLVMValueRef
emit_fetch_system_value(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_src_register * reg,enum tgsi_opcode_type stype,unsigned swizzle_in)1479 emit_fetch_system_value(
1480 struct lp_build_tgsi_context * bld_base,
1481 const struct tgsi_full_src_register * reg,
1482 enum tgsi_opcode_type stype,
1483 unsigned swizzle_in)
1484 {
1485 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1486 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1487 const struct tgsi_shader_info *info = bld->bld_base.info;
1488 LLVMBuilderRef builder = gallivm->builder;
1489 LLVMValueRef res;
1490 enum tgsi_opcode_type atype; // Actual type of the value
1491 unsigned swizzle = swizzle_in & 0xffff;
1492
1493 assert(!reg->Register.Indirect);
1494
1495 switch (info->system_value_semantic_name[reg->Register.Index]) {
1496 case TGSI_SEMANTIC_INSTANCEID:
1497 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1498 atype = TGSI_TYPE_UNSIGNED;
1499 break;
1500
1501 case TGSI_SEMANTIC_VERTEXID:
1502 res = bld->system_values.vertex_id;
1503 atype = TGSI_TYPE_UNSIGNED;
1504 break;
1505
1506 case TGSI_SEMANTIC_VERTEXID_NOBASE:
1507 res = bld->system_values.vertex_id_nobase;
1508 atype = TGSI_TYPE_UNSIGNED;
1509 break;
1510
1511 case TGSI_SEMANTIC_BASEVERTEX:
1512 res = bld->system_values.basevertex;
1513 atype = TGSI_TYPE_UNSIGNED;
1514 break;
1515
1516 case TGSI_SEMANTIC_BASEINSTANCE:
1517 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1518 atype = TGSI_TYPE_UNSIGNED;
1519 break;
1520
1521 case TGSI_SEMANTIC_PRIMID:
1522 res = bld->system_values.prim_id;
1523 atype = TGSI_TYPE_UNSIGNED;
1524 break;
1525
1526 case TGSI_SEMANTIC_INVOCATIONID:
1527 if (info->processor == PIPE_SHADER_TESS_CTRL)
1528 res = bld->system_values.invocation_id;
1529 else
1530 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1531 atype = TGSI_TYPE_UNSIGNED;
1532 break;
1533
1534 case TGSI_SEMANTIC_HELPER_INVOCATION:
1535 res = LLVMBuildNot(gallivm->builder, lp_build_mask_value(bld->mask), "");
1536 atype = TGSI_TYPE_UNSIGNED;
1537 break;
1538
1539 case TGSI_SEMANTIC_THREAD_ID:
1540 res = bld->system_values.thread_id[swizzle];
1541 atype = TGSI_TYPE_UNSIGNED;
1542 break;
1543
1544 case TGSI_SEMANTIC_BLOCK_ID:
1545 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_id[swizzle]);
1546 atype = TGSI_TYPE_UNSIGNED;
1547 break;
1548
1549 case TGSI_SEMANTIC_GRID_SIZE:
1550 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.grid_size[swizzle]);
1551 atype = TGSI_TYPE_UNSIGNED;
1552 break;
1553
1554 case TGSI_SEMANTIC_TESSCOORD:
1555 {
1556 LLVMValueRef index[] = { lp_build_const_int32(gallivm, 0), lp_build_const_int32(gallivm, swizzle_in) };
1557 LLVMValueRef array_indexed = LLVMBuildGEP2(gallivm->builder, bld->bld_base.base.vec_type,
1558 bld->system_values.tess_coord, index, 2, "tess_coord_array_indexed");
1559 res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, array_indexed, "tess_coord");
1560 }
1561 atype = TGSI_TYPE_FLOAT;
1562 break;
1563
1564 case TGSI_SEMANTIC_FACE:
1565 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1566 atype = TGSI_TYPE_UNSIGNED;
1567 break;
1568
1569 case TGSI_SEMANTIC_DRAWID:
1570 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1571 atype = TGSI_TYPE_UNSIGNED;
1572 break;
1573
1574 case TGSI_SEMANTIC_SAMPLEID:
1575 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1576 atype = TGSI_TYPE_UNSIGNED;
1577 break;
1578
1579 case TGSI_SEMANTIC_TESSOUTER:
1580 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1581 bld->system_values.tess_outer,
1582 lp_build_const_int32(gallivm, swizzle_in));
1583 atype = TGSI_TYPE_FLOAT;
1584 break;
1585
1586 case TGSI_SEMANTIC_TESSINNER:
1587 res = lp_build_extract_broadcast(gallivm, lp_type_float_vec(32, 128), bld_base->base.type,
1588 bld->system_values.tess_inner,
1589 lp_build_const_int32(gallivm, swizzle_in));
1590 atype = TGSI_TYPE_FLOAT;
1591 break;
1592
1593 case TGSI_SEMANTIC_VERTICESIN:
1594 res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.vertices_in);
1595 atype = TGSI_TYPE_UNSIGNED;
1596 break;
1597
1598 default:
1599 assert(!"unexpected semantic in emit_fetch_system_value");
1600 res = bld_base->base.zero;
1601 atype = TGSI_TYPE_FLOAT;
1602 break;
1603 }
1604
1605 if (atype != stype) {
1606 if (stype == TGSI_TYPE_FLOAT) {
1607 res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, "");
1608 } else if (stype == TGSI_TYPE_UNSIGNED) {
1609 res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, "");
1610 } else if (stype == TGSI_TYPE_SIGNED) {
1611 res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, "");
1612 }
1613 }
1614
1615 return res;
1616 }
1617
1618 /**
1619 * Register fetch with derivatives.
1620 */
1621 static void
emit_fetch_deriv(struct lp_build_tgsi_soa_context * bld,LLVMValueRef src,LLVMValueRef * res,LLVMValueRef * ddx,LLVMValueRef * ddy)1622 emit_fetch_deriv(
1623 struct lp_build_tgsi_soa_context *bld,
1624 LLVMValueRef src,
1625 LLVMValueRef *res,
1626 LLVMValueRef *ddx,
1627 LLVMValueRef *ddy)
1628 {
1629 if (res)
1630 *res = src;
1631
1632 /* TODO: use interpolation coeffs for inputs */
1633
1634 if (ddx)
1635 *ddx = lp_build_ddx(&bld->bld_base.base, src);
1636
1637 if (ddy)
1638 *ddy = lp_build_ddy(&bld->bld_base.base, src);
1639 }
1640
1641 /**
1642 * store an array of vec-length 64-bit into two arrays of vec_length floats
1643 * i.e.
1644 * value is d0, d1, d2, d3 etc.
1645 * each 64-bit has high and low pieces x, y
1646 * so gets stored into the separate channels as:
1647 * chan_ptr = d0.x, d1.x, d2.x, d3.x
1648 * chan_ptr2 = d0.y, d1.y, d2.y, d3.y
1649 */
1650 static void
emit_store_64bit_chan(struct lp_build_tgsi_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)1651 emit_store_64bit_chan(struct lp_build_tgsi_context *bld_base,
1652 LLVMValueRef chan_ptr, LLVMValueRef chan_ptr2,
1653 LLVMValueRef value)
1654 {
1655 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1656 struct gallivm_state *gallivm = bld_base->base.gallivm;
1657 LLVMBuilderRef builder = gallivm->builder;
1658 struct lp_build_context *float_bld = &bld_base->base;
1659 unsigned i;
1660 LLVMValueRef temp, temp2;
1661 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
1662 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
1663
1664 for (i = 0; i < bld_base->base.type.length; i++) {
1665 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
1666 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
1667 }
1668
1669 temp = LLVMBuildShuffleVector(builder, value,
1670 LLVMGetUndef(LLVMTypeOf(value)),
1671 LLVMConstVector(shuffles,
1672 bld_base->base.type.length),
1673 "");
1674 temp2 = LLVMBuildShuffleVector(builder, value,
1675 LLVMGetUndef(LLVMTypeOf(value)),
1676 LLVMConstVector(shuffles2,
1677 bld_base->base.type.length),
1678 "");
1679
1680 lp_exec_mask_store(&bld->exec_mask, float_bld, temp, chan_ptr);
1681 lp_exec_mask_store(&bld->exec_mask, float_bld, temp2, chan_ptr2);
1682 }
1683
1684 static void
emit_store_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1685 emit_store_output(struct lp_build_tgsi_context *bld_base,
1686 enum tgsi_opcode_type dtype,
1687 const struct tgsi_full_dst_register *reg,
1688 unsigned index,
1689 unsigned chan_index,
1690 LLVMValueRef indirect_index,
1691 LLVMValueRef value)
1692 {
1693 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1694 struct gallivm_state *gallivm = bld_base->base.gallivm;
1695 LLVMBuilderRef builder = gallivm->builder;
1696 struct lp_build_context *float_bld = &bld_base->base;
1697
1698 /* Outputs are always stored as floats */
1699 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1700
1701 if (reg->Register.Indirect) {
1702 LLVMValueRef index_vec; /* indexes into the output registers */
1703 LLVMValueRef outputs_array;
1704 LLVMTypeRef fptr_type;
1705
1706 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1707 indirect_index,
1708 chan_index,
1709 true);
1710
1711 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1712 outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, "");
1713
1714 /* Scatter store values into output registers */
1715 emit_mask_scatter(bld, outputs_array, index_vec, value,
1716 &bld->exec_mask);
1717 }
1718 else {
1719 assert(LLVMTypeOf(value) == float_bld->vec_type);
1720 LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index,
1721 chan_index);
1722
1723 if (tgsi_type_is_64bit(dtype)) {
1724 LLVMValueRef out_ptr2 = lp_get_output_ptr(bld, reg->Register.Index,
1725 chan_index + 1);
1726 emit_store_64bit_chan(bld_base, out_ptr, out_ptr2,
1727 value);
1728 } else
1729 lp_exec_mask_store(&bld->exec_mask, float_bld, value, out_ptr);
1730 }
1731 }
1732
1733 static void
emit_store_tcs_output(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1734 emit_store_tcs_output(struct lp_build_tgsi_context *bld_base,
1735 enum tgsi_opcode_type dtype,
1736 const struct tgsi_full_dst_register *reg,
1737 unsigned index,
1738 unsigned chan_index,
1739 LLVMValueRef indirect_index,
1740 LLVMValueRef value)
1741 {
1742 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1743 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
1744 const struct tgsi_shader_info *info = bld->bld_base.info;
1745 LLVMValueRef attrib_index = NULL;
1746 LLVMValueRef vertex_index = NULL;
1747 LLVMValueRef channel_index = NULL;
1748
1749 if (reg->Register.Indirect) {
1750 /*
1751 * XXX: this is possibly not quite the right value, since file_max may be
1752 * larger than the max attrib index, due to it being the max of declared
1753 * inputs AND the max vertices per prim (which is 6 for tri adj).
1754 * It should however be safe to use (since we always allocate
1755 * PIPE_MAX_SHADER_INPUTS (80) for it, which is overallocated quite a bit).
1756 */
1757 int index_limit = info->file_max[reg->Register.File];
1758 attrib_index = get_indirect_index(bld,
1759 reg->Register.File,
1760 reg->Register.Index,
1761 ®->Indirect,
1762 index_limit);
1763 } else {
1764 attrib_index = lp_build_const_int32(gallivm, reg->Register.Index);
1765 }
1766
1767 if (reg->Dimension.Indirect) {
1768 vertex_index = get_indirect_index(bld,
1769 reg->Register.File,
1770 reg->Dimension.Index,
1771 ®->DimIndirect,
1772 PIPE_MAX_SHADER_OUTPUTS);
1773 } else {
1774 vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index);
1775 }
1776
1777 channel_index = lp_build_const_int32(gallivm, chan_index);
1778
1779 assert(bld->tcs_iface->emit_store_output);
1780 bld->tcs_iface->emit_store_output(bld->tcs_iface, (struct lp_build_context*)bld_base,
1781 bld_base->info->output_semantic_name[reg->Register.Index],
1782 reg->Dimension.Indirect,
1783 vertex_index,
1784 reg->Register.Indirect,
1785 attrib_index,
1786 false,
1787 channel_index,
1788 value,
1789 mask_vec(bld_base));
1790 }
1791
1792 static void
emit_store_temp(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1793 emit_store_temp(struct lp_build_tgsi_context *bld_base,
1794 enum tgsi_opcode_type dtype,
1795 const struct tgsi_full_dst_register *reg,
1796 unsigned index,
1797 unsigned chan_index,
1798 LLVMValueRef indirect_index,
1799 LLVMValueRef value)
1800 {
1801 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1802 struct gallivm_state *gallivm = bld_base->base.gallivm;
1803 LLVMBuilderRef builder = gallivm->builder;
1804 struct lp_build_context *float_bld = &bld_base->base;
1805
1806 /* Temporaries are always stored as floats */
1807 if (!tgsi_type_is_64bit(dtype))
1808 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1809 else
1810 value = LLVMBuildBitCast(builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2), "");
1811
1812 if (reg->Register.Indirect) {
1813 LLVMValueRef index_vec; /* indexes into the temp registers */
1814 LLVMValueRef temps_array;
1815 LLVMTypeRef fptr_type;
1816
1817 index_vec = get_soa_array_offsets(&bld_base->uint_bld,
1818 indirect_index,
1819 chan_index,
1820 true);
1821
1822 fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0);
1823 temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, "");
1824
1825 /* Scatter store values into temp registers */
1826 emit_mask_scatter(bld, temps_array, index_vec, value,
1827 &bld->exec_mask);
1828 }
1829 else {
1830 LLVMValueRef temp_ptr;
1831 temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index);
1832
1833 if (tgsi_type_is_64bit(dtype)) {
1834 LLVMValueRef temp_ptr2 = lp_get_temp_ptr_soa(bld,
1835 reg->Register.Index,
1836 chan_index + 1);
1837 emit_store_64bit_chan(bld_base, temp_ptr, temp_ptr2,
1838 value);
1839 }
1840 else
1841 lp_exec_mask_store(&bld->exec_mask, float_bld, value, temp_ptr);
1842 }
1843 }
1844
1845 static void
emit_store_address(struct lp_build_tgsi_context * bld_base,enum tgsi_opcode_type dtype,const struct tgsi_full_dst_register * reg,unsigned index,unsigned chan_index,LLVMValueRef indirect_index,LLVMValueRef value)1846 emit_store_address(struct lp_build_tgsi_context *bld_base,
1847 enum tgsi_opcode_type dtype,
1848 const struct tgsi_full_dst_register *reg,
1849 unsigned index,
1850 unsigned chan_index,
1851 LLVMValueRef indirect_index,
1852 LLVMValueRef value)
1853 {
1854 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1855 struct gallivm_state *gallivm = bld_base->base.gallivm;
1856 LLVMBuilderRef builder = gallivm->builder;
1857 struct lp_build_context *int_bld = &bld_base->int_bld;
1858
1859 assert(dtype == TGSI_TYPE_SIGNED);
1860 assert(LLVMTypeOf(value) == int_bld->vec_type);
1861 value = LLVMBuildBitCast(builder, value, int_bld->vec_type, "");
1862 lp_exec_mask_store(&bld->exec_mask, int_bld, value,
1863 bld->addr[reg->Register.Index][chan_index]);
1864 }
1865
1866 /**
1867 * Register store.
1868 */
1869 static void
emit_store_chan(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned index,unsigned chan_index,LLVMValueRef value)1870 emit_store_chan(
1871 struct lp_build_tgsi_context *bld_base,
1872 const struct tgsi_full_instruction *inst,
1873 unsigned index,
1874 unsigned chan_index,
1875 LLVMValueRef value)
1876 {
1877 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1878 struct gallivm_state *gallivm = bld_base->base.gallivm;
1879 LLVMBuilderRef builder = gallivm->builder;
1880 const struct tgsi_full_dst_register *reg = &inst->Dst[index];
1881 struct lp_build_context *float_bld = &bld_base->base;
1882 LLVMValueRef indirect_index = NULL;
1883 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1884
1885 /*
1886 * Apply saturation.
1887 *
1888 * It is always assumed to be float.
1889 */
1890 if (inst->Instruction.Saturate) {
1891 assert(dtype == TGSI_TYPE_FLOAT ||
1892 dtype == TGSI_TYPE_UNTYPED);
1893 value = LLVMBuildBitCast(builder, value, float_bld->vec_type, "");
1894 value = lp_build_clamp_zero_one_nanzero(float_bld, value);
1895 }
1896
1897 if (reg->Register.Indirect) {
1898 /*
1899 * Currently the mesa/st doesn't generate indirect stores
1900 * to 64-bit values, it normally uses MOV to do indirect stores.
1901 */
1902 assert(!tgsi_type_is_64bit(dtype));
1903 indirect_index = get_indirect_index(bld,
1904 reg->Register.File,
1905 reg->Register.Index,
1906 ®->Indirect,
1907 bld->bld_base.info->file_max[reg->Register.File]);
1908 } else {
1909 assert(reg->Register.Index <=
1910 bld_base->info->file_max[reg->Register.File]);
1911 }
1912
1913 if (DEBUG_EXECUTION) {
1914 emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value);
1915 }
1916
1917 assert(bld_base->emit_store_reg_funcs[reg->Register.File]);
1918 bld_base->emit_store_reg_funcs[reg->Register.File](bld_base,
1919 dtype,
1920 reg,
1921 index,
1922 chan_index,
1923 indirect_index,
1924 value);
1925
1926 (void)dtype;
1927 }
1928
1929 /*
1930 * Called at the beginning of the translation of each TGSI instruction, to
1931 * emit some debug code.
1932 */
1933 static void
emit_debug(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info)1934 emit_debug(
1935 struct lp_build_tgsi_context * bld_base,
1936 const struct tgsi_full_instruction * inst,
1937 const struct tgsi_opcode_info * info)
1938
1939 {
1940 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
1941
1942 if (DEBUG_EXECUTION) {
1943 /*
1944 * Dump the TGSI instruction.
1945 */
1946
1947 struct gallivm_state *gallivm = bld_base->base.gallivm;
1948 char buf[512];
1949 buf[0] = '$';
1950 buf[1] = ' ';
1951 tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2);
1952 lp_build_printf(gallivm, buf);
1953
1954 /* Dump the execution mask.
1955 */
1956 if (bld->exec_mask.has_mask) {
1957 lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask);
1958 }
1959 }
1960 }
1961
1962 static void
emit_store(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,const struct tgsi_opcode_info * info,unsigned index,LLVMValueRef dst[4])1963 emit_store(
1964 struct lp_build_tgsi_context * bld_base,
1965 const struct tgsi_full_instruction * inst,
1966 const struct tgsi_opcode_info * info,
1967 unsigned index,
1968 LLVMValueRef dst[4])
1969
1970 {
1971 enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode, index);
1972
1973 unsigned writemask = inst->Dst[index].Register.WriteMask;
1974 while (writemask) {
1975 unsigned chan_index = u_bit_scan(&writemask);
1976 if (tgsi_type_is_64bit(dtype) && (chan_index == 1 || chan_index == 3))
1977 continue;
1978 emit_store_chan(bld_base, inst, index, chan_index, dst[chan_index]);
1979 }
1980 }
1981
1982 static unsigned
tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)1983 tgsi_to_pipe_tex_target(enum tgsi_texture_type tgsi_target)
1984 {
1985 switch (tgsi_target) {
1986 case TGSI_TEXTURE_BUFFER:
1987 return PIPE_BUFFER;
1988 case TGSI_TEXTURE_1D:
1989 case TGSI_TEXTURE_SHADOW1D:
1990 return PIPE_TEXTURE_1D;
1991 case TGSI_TEXTURE_2D:
1992 case TGSI_TEXTURE_SHADOW2D:
1993 case TGSI_TEXTURE_2D_MSAA:
1994 return PIPE_TEXTURE_2D;
1995 case TGSI_TEXTURE_3D:
1996 return PIPE_TEXTURE_3D;
1997 case TGSI_TEXTURE_CUBE:
1998 case TGSI_TEXTURE_SHADOWCUBE:
1999 return PIPE_TEXTURE_CUBE;
2000 case TGSI_TEXTURE_RECT:
2001 case TGSI_TEXTURE_SHADOWRECT:
2002 return PIPE_TEXTURE_RECT;
2003 case TGSI_TEXTURE_1D_ARRAY:
2004 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2005 return PIPE_TEXTURE_1D_ARRAY;
2006 case TGSI_TEXTURE_2D_ARRAY:
2007 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2008 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2009 return PIPE_TEXTURE_2D_ARRAY;
2010 case TGSI_TEXTURE_CUBE_ARRAY:
2011 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2012 return PIPE_TEXTURE_CUBE_ARRAY;
2013 default:
2014 assert(0);
2015 return PIPE_BUFFER;
2016 }
2017 }
2018
2019
2020 static enum lp_sampler_lod_property
lp_build_lod_property(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_instruction * inst,unsigned src_op)2021 lp_build_lod_property(
2022 struct lp_build_tgsi_context *bld_base,
2023 const struct tgsi_full_instruction *inst,
2024 unsigned src_op)
2025 {
2026 const struct tgsi_full_src_register *reg = &inst->Src[src_op];
2027 enum lp_sampler_lod_property lod_property;
2028
2029 /*
2030 * Not much we can do here. We could try catching inputs declared
2031 * with constant interpolation but not sure it's worth it - since for
2032 * TEX opcodes as well as FETCH/LD the lod comes from same reg as
2033 * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just
2034 * like the constant/immediate recognition below.
2035 * What seems to be of more value would be to recognize temps holding
2036 * broadcasted scalars but no way we can do it.
2037 * Tried asking llvm but without any success (using LLVMIsConstant
2038 * even though this isn't exactly what we'd need), even as simple as
2039 * IMM[0] UINT32 (0,-1,0,0)
2040 * MOV TEMP[0] IMM[0].yyyy
2041 * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0]
2042 * doesn't work.
2043 * This means there's ZERO chance this will ever catch a scalar lod
2044 * with traditional tex opcodes as well as texel fetches, since the lod
2045 * comes from the same reg as coords (except some test shaders using
2046 * constant coords maybe).
2047 * There's at least hope for sample opcodes as well as size queries.
2048 */
2049 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ ||
2050 reg->Register.File == TGSI_FILE_CONSTANT ||
2051 reg->Register.File == TGSI_FILE_IMMEDIATE) {
2052 lod_property = LP_SAMPLER_LOD_SCALAR;
2053 }
2054 else if (bld_base->info->processor == PIPE_SHADER_FRAGMENT) {
2055 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2056 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2057 }
2058 else {
2059 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2060 }
2061 }
2062 else {
2063 /* never use scalar (per-quad) lod the results are just too wrong. */
2064 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2065 }
2066 return lod_property;
2067 }
2068
2069
2070 /**
2071 * High-level instruction translators.
2072 */
2073
2074 static void
emit_tex(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,LLVMValueRef * texel,unsigned sampler_reg,enum lp_sampler_op_type sampler_op)2075 emit_tex( struct lp_build_tgsi_soa_context *bld,
2076 const struct tgsi_full_instruction *inst,
2077 enum lp_build_tex_modifier modifier,
2078 LLVMValueRef *texel,
2079 unsigned sampler_reg,
2080 enum lp_sampler_op_type sampler_op)
2081 {
2082 unsigned unit = inst->Src[sampler_reg].Register.Index;
2083 LLVMValueRef oow = NULL;
2084 LLVMValueRef lod = NULL;
2085 LLVMValueRef coords[5];
2086 LLVMValueRef offsets[3] = { NULL };
2087 struct lp_derivatives derivs;
2088 struct lp_sampler_params params = { 0 };
2089 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2090 unsigned num_derivs, num_offsets, i;
2091 unsigned shadow_coord = 0;
2092 unsigned layer_coord = 0;
2093 unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT;
2094
2095 if (!bld->sampler) {
2096 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2097 for (i = 0; i < 4; i++) {
2098 texel[i] = bld->bld_base.base.undef;
2099 }
2100 return;
2101 }
2102
2103 switch (inst->Texture.Texture) {
2104 case TGSI_TEXTURE_1D_ARRAY:
2105 layer_coord = 1;
2106 FALLTHROUGH;
2107 case TGSI_TEXTURE_1D:
2108 num_offsets = 1;
2109 num_derivs = 1;
2110 break;
2111 case TGSI_TEXTURE_2D_ARRAY:
2112 layer_coord = 2;
2113 FALLTHROUGH;
2114 case TGSI_TEXTURE_2D:
2115 case TGSI_TEXTURE_RECT:
2116 num_offsets = 2;
2117 num_derivs = 2;
2118 break;
2119 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2120 layer_coord = 1;
2121 FALLTHROUGH;
2122 case TGSI_TEXTURE_SHADOW1D:
2123 shadow_coord = 2;
2124 num_offsets = 1;
2125 num_derivs = 1;
2126 break;
2127 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2128 layer_coord = 2;
2129 shadow_coord = 3;
2130 num_offsets = 2;
2131 num_derivs = 2;
2132 break;
2133 case TGSI_TEXTURE_SHADOW2D:
2134 case TGSI_TEXTURE_SHADOWRECT:
2135 shadow_coord = 2;
2136 num_offsets = 2;
2137 num_derivs = 2;
2138 break;
2139 case TGSI_TEXTURE_CUBE:
2140 num_offsets = 2;
2141 num_derivs = 3;
2142 break;
2143 case TGSI_TEXTURE_3D:
2144 num_offsets = 3;
2145 num_derivs = 3;
2146 break;
2147 case TGSI_TEXTURE_SHADOWCUBE:
2148 shadow_coord = 3;
2149 num_offsets = 2;
2150 num_derivs = 3;
2151 break;
2152 case TGSI_TEXTURE_CUBE_ARRAY:
2153 num_offsets = 2;
2154 num_derivs = 3;
2155 layer_coord = 3;
2156 break;
2157 case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
2158 num_offsets = 2;
2159 num_derivs = 3;
2160 layer_coord = 3;
2161 shadow_coord = 4; /* shadow coord special different reg */
2162 break;
2163 case TGSI_TEXTURE_2D_MSAA:
2164 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2165 default:
2166 assert(0);
2167 return;
2168 }
2169
2170 /* Note lod and especially projected are illegal in a LOT of cases */
2171 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2172 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2173 if (inst->Instruction.Opcode == TGSI_OPCODE_TEX_LZ) {
2174 lod = bld->bld_base.base.zero;
2175 } else if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE ||
2176 inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) {
2177 /* note that shadow cube array with bias/explicit lod does not exist */
2178 lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2179 }
2180 else {
2181 lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2182 }
2183 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2184 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2185 }
2186 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2187 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2188 }
2189 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2190 }
2191
2192 if (sampler_op == LP_SAMPLER_OP_GATHER) {
2193 uint32_t comp_val = inst->Src[sampler_reg].Register.SwizzleX;
2194 sample_key |= (comp_val << LP_SAMPLER_GATHER_COMP_SHIFT);
2195 }
2196 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) {
2197 oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2198 oow = lp_build_rcp(&bld->bld_base.base, oow);
2199 }
2200
2201 for (i = 0; i < num_derivs; i++) {
2202 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2203 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2204 coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow);
2205 }
2206 for (i = num_derivs; i < 5; i++) {
2207 coords[i] = bld->bld_base.base.undef;
2208 }
2209
2210 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2211 if (layer_coord) {
2212 if (layer_coord == 3) {
2213 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2214 }
2215 else {
2216 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2217 }
2218 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2219 coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow);
2220 }
2221 /* Shadow coord occupies always 5th slot. */
2222 if (shadow_coord) {
2223 sample_key |= LP_SAMPLER_SHADOW;
2224 if (shadow_coord == 4) {
2225 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0);
2226 }
2227 else {
2228 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord);
2229 }
2230 if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED)
2231 coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow);
2232 }
2233
2234 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2235 unsigned dim;
2236 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2237 for (dim = 0; dim < num_derivs; ++dim) {
2238 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim);
2239 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim);
2240 }
2241 params.derivs = &derivs;
2242 /*
2243 * could also check all src regs if constant but I doubt such
2244 * cases exist in practice.
2245 */
2246 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2247 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2248 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2249 }
2250 else {
2251 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2252 }
2253 }
2254 else {
2255 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2256 }
2257 }
2258 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2259
2260 /* we don't handle the 4 offset version of tg4 */
2261 if (inst->Texture.NumOffsets == 1) {
2262 unsigned dim;
2263 sample_key |= LP_SAMPLER_OFFSETS;
2264 for (dim = 0; dim < num_offsets; dim++) {
2265 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2266 }
2267 }
2268
2269 params.type = bld->bld_base.base.type;
2270 params.sample_key = sample_key;
2271 params.texture_index = unit;
2272 params.sampler_index = unit;
2273 params.resources_type = bld->resources_type;
2274 params.resources_ptr = bld->resources_ptr;
2275 params.thread_data_type = bld->thread_data_type;
2276 params.thread_data_ptr = bld->thread_data_ptr;
2277 params.coords = coords;
2278 params.offsets = offsets;
2279 params.lod = lod;
2280 params.texel = texel;
2281
2282 bld->sampler->emit_tex_sample(bld->sampler,
2283 bld->bld_base.base.gallivm,
2284 ¶ms);
2285 }
2286
2287 static void
emit_sample(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,enum lp_build_tex_modifier modifier,bool compare,enum lp_sampler_op_type sample_type,LLVMValueRef * texel)2288 emit_sample(struct lp_build_tgsi_soa_context *bld,
2289 const struct tgsi_full_instruction *inst,
2290 enum lp_build_tex_modifier modifier,
2291 bool compare,
2292 enum lp_sampler_op_type sample_type,
2293 LLVMValueRef *texel)
2294 {
2295 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2296 unsigned texture_unit, sampler_unit;
2297 LLVMValueRef lod = NULL;
2298 LLVMValueRef coords[5];
2299 LLVMValueRef offsets[3] = { NULL };
2300 struct lp_derivatives derivs;
2301 struct lp_sampler_params params = { 0 };
2302 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2303
2304 unsigned num_offsets, num_derivs, i;
2305 unsigned layer_coord = 0;
2306 unsigned sample_key = sample_type << LP_SAMPLER_OP_TYPE_SHIFT;
2307
2308 if (!bld->sampler) {
2309 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2310 for (i = 0; i < 4; i++) {
2311 texel[i] = bld->bld_base.base.undef;
2312 }
2313 return;
2314 }
2315
2316 /*
2317 * unlike old-style tex opcodes the texture/sampler indices
2318 * always come from src1 and src2 respectively.
2319 */
2320 texture_unit = inst->Src[1].Register.Index;
2321 sampler_unit = inst->Src[2].Register.Index;
2322
2323 /*
2324 * Note inst->Texture.Texture will contain the number of offsets,
2325 * however the target information is NOT there and comes from the
2326 * declared sampler views instead.
2327 */
2328 switch (bld->sv[texture_unit].Resource) {
2329 case TGSI_TEXTURE_1D:
2330 num_offsets = 1;
2331 num_derivs = 1;
2332 break;
2333 case TGSI_TEXTURE_1D_ARRAY:
2334 layer_coord = 1;
2335 num_offsets = 1;
2336 num_derivs = 1;
2337 break;
2338 case TGSI_TEXTURE_2D:
2339 case TGSI_TEXTURE_RECT:
2340 num_offsets = 2;
2341 num_derivs = 2;
2342 break;
2343 case TGSI_TEXTURE_2D_ARRAY:
2344 layer_coord = 2;
2345 num_offsets = 2;
2346 num_derivs = 2;
2347 break;
2348 case TGSI_TEXTURE_CUBE:
2349 num_offsets = 2;
2350 num_derivs = 3;
2351 break;
2352 case TGSI_TEXTURE_3D:
2353 num_offsets = 3;
2354 num_derivs = 3;
2355 break;
2356 case TGSI_TEXTURE_CUBE_ARRAY:
2357 layer_coord = 3;
2358 num_offsets = 2;
2359 num_derivs = 3;
2360 break;
2361 default:
2362 assert(0);
2363 return;
2364 }
2365
2366 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS ||
2367 modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2368 lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2369 if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) {
2370 sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT;
2371 }
2372 else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) {
2373 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2374 }
2375 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2376 }
2377 else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) {
2378 /* XXX might be better to explicitly pass the level zero information */
2379 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2380 lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F);
2381 }
2382
2383 for (i = 0; i < num_derivs; i++) {
2384 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2385 }
2386 for (i = num_derivs; i < 5; i++) {
2387 coords[i] = bld->bld_base.base.undef;
2388 }
2389
2390 /* Layer coord always goes into 3rd slot, except for cube map arrays */
2391 if (layer_coord) {
2392 if (layer_coord == 3)
2393 coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2394 else
2395 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2396 }
2397 /* Shadow coord occupies always 5th slot. */
2398 if (compare) {
2399 sample_key |= LP_SAMPLER_SHADOW;
2400 coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0);
2401 }
2402
2403 if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) {
2404 unsigned dim;
2405 sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT;
2406 for (dim = 0; dim < num_derivs; ++dim) {
2407 derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim);
2408 derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim);
2409 }
2410 params.derivs = &derivs;
2411 /*
2412 * could also check all src regs if constant but I doubt such
2413 * cases exist in practice.
2414 */
2415 if (bld->bld_base.info->processor == PIPE_SHADER_FRAGMENT) {
2416 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD) {
2417 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2418 }
2419 else {
2420 lod_property = LP_SAMPLER_LOD_PER_QUAD;
2421 }
2422 }
2423 else {
2424 lod_property = LP_SAMPLER_LOD_PER_ELEMENT;
2425 }
2426 }
2427
2428 /* some advanced gather instructions (txgo) would require 4 offsets */
2429 if (inst->Texture.NumOffsets == 1) {
2430 unsigned dim;
2431 sample_key |= LP_SAMPLER_OFFSETS;
2432 for (dim = 0; dim < num_offsets; dim++) {
2433 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2434 }
2435 }
2436 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2437
2438 params.type = bld->bld_base.base.type;
2439 params.sample_key = sample_key;
2440 params.texture_index = texture_unit;
2441 params.sampler_index = sampler_unit;
2442 params.resources_type = bld->resources_type;
2443 params.resources_ptr = bld->resources_ptr;
2444 params.thread_data_type = bld->thread_data_type;
2445 params.thread_data_ptr = bld->thread_data_ptr;
2446 params.coords = coords;
2447 params.offsets = offsets;
2448 params.lod = lod;
2449 params.texel = texel;
2450
2451 bld->sampler->emit_tex_sample(bld->sampler,
2452 bld->bld_base.base.gallivm,
2453 ¶ms);
2454
2455 if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2456 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2457 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2458 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W) {
2459 unsigned char swizzles[4];
2460 swizzles[0] = inst->Src[1].Register.SwizzleX;
2461 swizzles[1] = inst->Src[1].Register.SwizzleY;
2462 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2463 swizzles[3] = inst->Src[1].Register.SwizzleW;
2464
2465 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2466 }
2467 }
2468
2469 static void
emit_fetch_texels(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * texel,bool is_samplei)2470 emit_fetch_texels( struct lp_build_tgsi_soa_context *bld,
2471 const struct tgsi_full_instruction *inst,
2472 LLVMValueRef *texel,
2473 bool is_samplei)
2474 {
2475 unsigned unit, target;
2476 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
2477 LLVMValueRef explicit_lod = NULL;
2478 LLVMValueRef coords[5];
2479 LLVMValueRef offsets[3] = { NULL };
2480 LLVMValueRef ms_index = NULL;
2481 struct lp_sampler_params params = { 0 };
2482 enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR;
2483 unsigned dims, i;
2484 unsigned layer_coord = 0;
2485 unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT;
2486
2487 if (!bld->sampler) {
2488 _debug_printf("warning: found texture instruction but no sampler generator supplied\n");
2489 for (i = 0; i < 4; i++) {
2490 texel[i] = coord_undef;
2491 }
2492 return;
2493 }
2494
2495 unit = inst->Src[1].Register.Index;
2496
2497 if (is_samplei) {
2498 target = bld->sv[unit].Resource;
2499 }
2500 else {
2501 target = inst->Texture.Texture;
2502 }
2503
2504 switch (target) {
2505 case TGSI_TEXTURE_1D:
2506 case TGSI_TEXTURE_BUFFER:
2507 dims = 1;
2508 break;
2509 case TGSI_TEXTURE_1D_ARRAY:
2510 layer_coord = 1;
2511 dims = 1;
2512 break;
2513 case TGSI_TEXTURE_2D:
2514 case TGSI_TEXTURE_RECT:
2515 case TGSI_TEXTURE_2D_MSAA:
2516 dims = 2;
2517 break;
2518 case TGSI_TEXTURE_2D_ARRAY:
2519 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2520 layer_coord = 2;
2521 dims = 2;
2522 break;
2523 case TGSI_TEXTURE_3D:
2524 dims = 3;
2525 break;
2526 default:
2527 assert(0);
2528 return;
2529 }
2530
2531 /* always have lod except for buffers and msaa targets ? */
2532 if (target != TGSI_TEXTURE_BUFFER &&
2533 target != TGSI_TEXTURE_2D_MSAA &&
2534 target != TGSI_TEXTURE_2D_ARRAY_MSAA &&
2535 inst->Instruction.Opcode != TGSI_OPCODE_TXF_LZ) {
2536 sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT;
2537 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2538 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2539 }
2540
2541 if (target == TGSI_TEXTURE_2D_MSAA ||
2542 target == TGSI_TEXTURE_2D_ARRAY_MSAA) {
2543 sample_key |= LP_SAMPLER_FETCH_MS;
2544 ms_index = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3);
2545 }
2546
2547 /*
2548 * XXX: for real msaa support, the w component (or src2.x for sample_i_ms)
2549 * would be the sample index.
2550 */
2551
2552 for (i = 0; i < dims; i++) {
2553 coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i);
2554 }
2555 /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */
2556 for (i = dims; i < 5; i++) {
2557 coords[i] = coord_undef;
2558 }
2559 if (layer_coord)
2560 coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord);
2561
2562 if (inst->Texture.NumOffsets == 1) {
2563 unsigned dim;
2564 sample_key |= LP_SAMPLER_OFFSETS;
2565 for (dim = 0; dim < dims; dim++) {
2566 offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim);
2567 }
2568 }
2569 sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT;
2570
2571 params.type = bld->bld_base.base.type;
2572 params.sample_key = sample_key;
2573 params.texture_index = unit;
2574 /*
2575 * sampler not actually used, set to 0 so it won't exceed PIPE_MAX_SAMPLERS
2576 * and trigger some assertions with d3d10 where the sampler view number
2577 * can exceed this.
2578 */
2579 params.sampler_index = 0;
2580 params.resources_type = bld->resources_type;
2581 params.resources_ptr = bld->resources_ptr;
2582 params.thread_data_type = bld->thread_data_type;
2583 params.thread_data_ptr = bld->thread_data_ptr;
2584 params.coords = coords;
2585 params.offsets = offsets;
2586 params.derivs = NULL;
2587 params.lod = explicit_lod;
2588 params.texel = texel;
2589 params.ms_index = ms_index;
2590
2591 bld->sampler->emit_tex_sample(bld->sampler,
2592 bld->bld_base.base.gallivm,
2593 ¶ms);
2594
2595 if (is_samplei &&
2596 (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_X ||
2597 inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_Y ||
2598 inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_Z ||
2599 inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_W)) {
2600 unsigned char swizzles[4];
2601 swizzles[0] = inst->Src[1].Register.SwizzleX;
2602 swizzles[1] = inst->Src[1].Register.SwizzleY;
2603 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2604 swizzles[3] = inst->Src[1].Register.SwizzleW;
2605
2606 lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles);
2607 }
2608 }
2609
2610 static void
emit_size_query(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,LLVMValueRef * sizes_out,bool is_sviewinfo)2611 emit_size_query( struct lp_build_tgsi_soa_context *bld,
2612 const struct tgsi_full_instruction *inst,
2613 LLVMValueRef *sizes_out,
2614 bool is_sviewinfo)
2615 {
2616 LLVMValueRef explicit_lod;
2617 enum lp_sampler_lod_property lod_property;
2618 unsigned has_lod;
2619 unsigned i;
2620 unsigned unit = inst->Src[1].Register.Index;
2621 enum tgsi_texture_type target;
2622 enum pipe_texture_target pipe_target;
2623 struct lp_sampler_size_query_params params = { 0 };
2624
2625 if (is_sviewinfo) {
2626 target = bld->sv[unit].Resource;
2627 }
2628 else {
2629 target = inst->Texture.Texture;
2630 }
2631 switch (target) {
2632 case TGSI_TEXTURE_BUFFER:
2633 case TGSI_TEXTURE_RECT:
2634 case TGSI_TEXTURE_SHADOWRECT:
2635 case TGSI_TEXTURE_2D_MSAA:
2636 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2637 has_lod = 0;
2638 break;
2639 default:
2640 has_lod = 1;
2641 break;
2642 }
2643
2644 if (!bld->sampler) {
2645 _debug_printf("warning: found texture query instruction but no sampler generator supplied\n");
2646 for (i = 0; i < 4; i++)
2647 sizes_out[i] = bld->bld_base.int_bld.undef;
2648 return;
2649 }
2650
2651 if (has_lod) {
2652 explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0);
2653 lod_property = lp_build_lod_property(&bld->bld_base, inst, 0);
2654 }
2655 else {
2656 explicit_lod = NULL;
2657 lod_property = LP_SAMPLER_LOD_SCALAR;
2658 }
2659
2660
2661 pipe_target = tgsi_to_pipe_tex_target(target);
2662
2663 params.int_type = bld->bld_base.int_bld.type;
2664 params.texture_unit = unit;
2665 params.texture_unit_offset = NULL;
2666 params.target = pipe_target;
2667 params.resources_type = bld->resources_type;
2668 params.resources_ptr = bld->resources_ptr;
2669 params.is_sviewinfo = true;
2670 params.lod_property = lod_property;
2671 params.explicit_lod = explicit_lod;
2672 params.sizes_out = sizes_out;
2673 params.samples_only = false;
2674
2675 bld->sampler->emit_size_query(bld->sampler,
2676 bld->bld_base.base.gallivm,
2677 ¶ms);
2678 }
2679
2680 static bool
near_end_of_shader(struct lp_build_tgsi_soa_context * bld,int pc)2681 near_end_of_shader(struct lp_build_tgsi_soa_context *bld,
2682 int pc)
2683 {
2684 unsigned i;
2685
2686 for (i = 0; i < 5; i++) {
2687 enum tgsi_opcode opcode;
2688
2689 if (pc + i >= bld->bld_base.info->num_instructions)
2690 return true;
2691
2692 opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode;
2693
2694 if (opcode == TGSI_OPCODE_END)
2695 return true;
2696
2697 if (opcode == TGSI_OPCODE_TEX ||
2698 opcode == TGSI_OPCODE_TXP ||
2699 opcode == TGSI_OPCODE_TXD ||
2700 opcode == TGSI_OPCODE_TXB ||
2701 opcode == TGSI_OPCODE_TXL ||
2702 opcode == TGSI_OPCODE_TXF ||
2703 opcode == TGSI_OPCODE_TXQ ||
2704 opcode == TGSI_OPCODE_TEX2 ||
2705 opcode == TGSI_OPCODE_TXB2 ||
2706 opcode == TGSI_OPCODE_TXL2 ||
2707 opcode == TGSI_OPCODE_SAMPLE ||
2708 opcode == TGSI_OPCODE_SAMPLE_B ||
2709 opcode == TGSI_OPCODE_SAMPLE_C ||
2710 opcode == TGSI_OPCODE_SAMPLE_C_LZ ||
2711 opcode == TGSI_OPCODE_SAMPLE_D ||
2712 opcode == TGSI_OPCODE_SAMPLE_I ||
2713 opcode == TGSI_OPCODE_SAMPLE_I_MS ||
2714 opcode == TGSI_OPCODE_SAMPLE_L ||
2715 opcode == TGSI_OPCODE_SVIEWINFO ||
2716 opcode == TGSI_OPCODE_CAL ||
2717 opcode == TGSI_OPCODE_IF ||
2718 opcode == TGSI_OPCODE_UIF ||
2719 opcode == TGSI_OPCODE_BGNLOOP ||
2720 opcode == TGSI_OPCODE_SWITCH)
2721 return false;
2722 }
2723
2724 return true;
2725 }
2726
2727
2728
2729 /**
2730 * Kill fragment if any of the src register values are negative.
2731 */
2732 static void
emit_kill_if(struct lp_build_tgsi_soa_context * bld,const struct tgsi_full_instruction * inst,int pc)2733 emit_kill_if(
2734 struct lp_build_tgsi_soa_context *bld,
2735 const struct tgsi_full_instruction *inst,
2736 int pc)
2737 {
2738 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2739 const struct tgsi_full_src_register *reg = &inst->Src[0];
2740 LLVMValueRef terms[TGSI_NUM_CHANNELS];
2741 LLVMValueRef mask;
2742 unsigned chan_index;
2743
2744 memset(&terms, 0, sizeof terms);
2745
2746 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2747 unsigned swizzle;
2748
2749 /* Unswizzle channel */
2750 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
2751
2752 /* Check if the component has not been already tested. */
2753 assert(swizzle < TGSI_NUM_CHANNELS);
2754 if( !terms[swizzle] )
2755 /* TODO: change the comparison operator instead of setting the sign */
2756 terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index );
2757 }
2758
2759 mask = NULL;
2760 TGSI_FOR_EACH_CHANNEL( chan_index ) {
2761 if(terms[chan_index]) {
2762 LLVMValueRef chan_mask;
2763
2764 /*
2765 * If term < 0 then mask = 0 else mask = ~0.
2766 */
2767 chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero);
2768
2769 if(mask)
2770 mask = LLVMBuildAnd(builder, mask, chan_mask, "");
2771 else
2772 mask = chan_mask;
2773 }
2774 }
2775
2776 if (bld->exec_mask.has_mask) {
2777 LLVMValueRef invmask;
2778 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2779 mask = LLVMBuildOr(builder, mask, invmask, "");
2780 }
2781
2782 lp_build_mask_update(bld->mask, mask);
2783 if (!near_end_of_shader(bld, pc))
2784 lp_build_mask_check(bld->mask);
2785 }
2786
2787
2788 /**
2789 * Unconditional fragment kill.
2790 * The only predication is the execution mask which will apply if
2791 * we're inside a loop or conditional.
2792 */
2793 static void
emit_kill(struct lp_build_tgsi_soa_context * bld,int pc)2794 emit_kill(struct lp_build_tgsi_soa_context *bld,
2795 int pc)
2796 {
2797 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2798 LLVMValueRef mask;
2799
2800 /* For those channels which are "alive", disable fragment shader
2801 * execution.
2802 */
2803 if (bld->exec_mask.has_mask) {
2804 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2805 }
2806 else {
2807 LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type);
2808 mask = zero;
2809 }
2810
2811 lp_build_mask_update(bld->mask, mask);
2812
2813 if (!near_end_of_shader(bld, pc))
2814 lp_build_mask_check(bld->mask);
2815 }
2816
2817
2818 /**
2819 * Emit code which will dump the value of all the temporary registers
2820 * to stdout.
2821 */
2822 static void
emit_dump_file(struct lp_build_tgsi_soa_context * bld,unsigned file)2823 emit_dump_file(struct lp_build_tgsi_soa_context *bld,
2824 unsigned file)
2825 {
2826 const struct tgsi_shader_info *info = bld->bld_base.info;
2827 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2828 LLVMBuilderRef builder = gallivm->builder;
2829 LLVMValueRef reg_ptr;
2830 int index;
2831 int max_index = info->file_max[file];
2832
2833 /*
2834 * Some register files, particularly constants, can be very large,
2835 * and dumping everything could make this unusably slow.
2836 */
2837 max_index = MIN2(max_index, 32);
2838
2839 for (index = 0; index <= max_index; index++) {
2840 LLVMValueRef res;
2841 unsigned mask;
2842 int chan;
2843
2844 if (index < 8 * sizeof(unsigned) &&
2845 (info->file_mask[file] & (1u << index)) == 0) {
2846 /* This was not declared.*/
2847 continue;
2848 }
2849
2850 if (file == TGSI_FILE_INPUT) {
2851 mask = info->input_usage_mask[index];
2852 } else {
2853 mask = TGSI_WRITEMASK_XYZW;
2854 }
2855
2856 for (chan = 0; chan < 4; chan++) {
2857 if ((mask & (1 << chan)) == 0) {
2858 /* This channel is not used.*/
2859 continue;
2860 }
2861
2862 if (file == TGSI_FILE_CONSTANT) {
2863 struct tgsi_full_src_register reg;
2864 memset(®, 0, sizeof reg);
2865 reg.Register.File = file;
2866 reg.Register.Index = index;
2867 reg.Register.SwizzleX = 0;
2868 reg.Register.SwizzleY = 1;
2869 reg.Register.SwizzleZ = 2;
2870 reg.Register.SwizzleW = 3;
2871
2872 res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan);
2873 if (!res) {
2874 continue;
2875 }
2876 } else if (file == TGSI_FILE_INPUT) {
2877 res = bld->inputs[index][chan];
2878 if (!res) {
2879 continue;
2880 }
2881 } else if (file == TGSI_FILE_TEMPORARY) {
2882 reg_ptr = lp_get_temp_ptr_soa(bld, index, chan);
2883 assert(reg_ptr);
2884 res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, reg_ptr, "");
2885 } else if (file == TGSI_FILE_OUTPUT) {
2886 reg_ptr = lp_get_output_ptr(bld, index, chan);
2887 assert(reg_ptr);
2888 res = LLVMBuildLoad2(builder, bld->bld_base.base.vec_type, reg_ptr, "");
2889 } else {
2890 assert(0);
2891 continue;
2892 }
2893
2894 emit_dump_reg(gallivm, file, index, chan, res);
2895 }
2896 }
2897 }
2898
2899
2900
2901 void
lp_emit_declaration_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_declaration * decl)2902 lp_emit_declaration_soa(
2903 struct lp_build_tgsi_context *bld_base,
2904 const struct tgsi_full_declaration *decl)
2905 {
2906 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
2907 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2908 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2909 const unsigned first = decl->Range.First;
2910 const unsigned last = decl->Range.Last;
2911 unsigned idx, i;
2912
2913 assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]);
2914
2915 switch (decl->Declaration.File) {
2916 case TGSI_FILE_TEMPORARY:
2917 if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) {
2918 assert(last < LP_MAX_INLINED_TEMPS);
2919 for (idx = first; idx <= last; ++idx) {
2920 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2921 bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp");
2922 }
2923 }
2924 break;
2925
2926 case TGSI_FILE_OUTPUT:
2927 if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) {
2928 for (idx = first; idx <= last; ++idx) {
2929 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2930 bld->outputs[idx][i] = lp_build_alloca(gallivm,
2931 vec_type, "output");
2932 }
2933 }
2934 break;
2935
2936 case TGSI_FILE_ADDRESS:
2937 /* ADDR registers are only allocated with an integer LLVM IR type,
2938 * as they are guaranteed to always have integers.
2939 * XXX: Not sure if this exception is worthwhile (or the whole idea of
2940 * an ADDR register for that matter).
2941 */
2942 assert(last < LP_MAX_TGSI_ADDRS);
2943 for (idx = first; idx <= last; ++idx) {
2944 assert(idx < LP_MAX_TGSI_ADDRS);
2945 for (i = 0; i < TGSI_NUM_CHANNELS; i++)
2946 bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr");
2947 }
2948 break;
2949
2950 case TGSI_FILE_SAMPLER_VIEW:
2951 /*
2952 * The target stored here MUST match whatever there actually
2953 * is in the set sampler views (what about return type?).
2954 */
2955 assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS);
2956 for (idx = first; idx <= last; ++idx) {
2957 bld->sv[idx] = decl->SamplerView;
2958 }
2959 break;
2960
2961 case TGSI_FILE_CONSTANT:
2962 {
2963 /*
2964 * We could trivially fetch the per-buffer pointer when fetching the
2965 * constant, relying on llvm to figure out it's always the same pointer
2966 * anyway. However, doing so results in a huge (more than factor of 10)
2967 * slowdown in llvm compilation times for some (but not all) shaders
2968 * (more specifically, the IR optimization spends way more time in
2969 * DominatorTree::dominates). At least with llvm versions 3.1, 3.3.
2970 */
2971 unsigned idx2D = decl->Dim.Index2D;
2972 LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D);
2973 assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS);
2974 bld->consts[idx2D] = lp_llvm_buffer_base(gallivm, bld->consts_ptr,
2975 index2D, LP_MAX_TGSI_CONST_BUFFERS);
2976 bld->consts[idx2D] = LLVMBuildBitCast(gallivm->builder, bld->consts[idx2D], LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0), "");
2977 bld->consts_sizes[idx2D] = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr,
2978 index2D, LP_MAX_TGSI_CONST_BUFFERS);
2979 }
2980 break;
2981 case TGSI_FILE_BUFFER:
2982 {
2983 unsigned idx = decl->Range.First;
2984 LLVMValueRef index = lp_build_const_int32(gallivm, idx);
2985 assert(idx < LP_MAX_TGSI_SHADER_BUFFERS);
2986 bld->ssbos[idx] =
2987 lp_llvm_buffer_base(gallivm, bld->ssbo_ptr,
2988 index, LP_MAX_TGSI_SHADER_BUFFERS);
2989 bld->ssbo_sizes[idx] =
2990 lp_llvm_buffer_num_elements(gallivm, bld->ssbo_ptr,
2991 index, LP_MAX_TGSI_SHADER_BUFFERS);
2992
2993 }
2994 break;
2995 case TGSI_FILE_MEMORY:
2996 break;
2997 default:
2998 /* don't need to declare other vars */
2999 break;
3000 }
3001 }
3002
3003
lp_emit_immediate_soa(struct lp_build_tgsi_context * bld_base,const struct tgsi_full_immediate * imm)3004 void lp_emit_immediate_soa(
3005 struct lp_build_tgsi_context *bld_base,
3006 const struct tgsi_full_immediate *imm)
3007 {
3008 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3009 struct gallivm_state * gallivm = bld_base->base.gallivm;
3010 LLVMValueRef imms[4];
3011 unsigned i;
3012 const unsigned size = imm->Immediate.NrTokens - 1;
3013 assert(size <= 4);
3014 switch (imm->Immediate.DataType) {
3015 case TGSI_IMM_FLOAT32:
3016 for( i = 0; i < size; ++i )
3017 imms[i] =
3018 lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float);
3019
3020 break;
3021 case TGSI_IMM_FLOAT64:
3022 case TGSI_IMM_UINT64:
3023 case TGSI_IMM_INT64:
3024 case TGSI_IMM_UINT32:
3025 for( i = 0; i < size; ++i ) {
3026 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint);
3027 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3028 }
3029
3030 break;
3031 case TGSI_IMM_INT32:
3032 for( i = 0; i < size; ++i ) {
3033 LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int);
3034 imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type);
3035 }
3036
3037 break;
3038 }
3039 for( i = size; i < 4; ++i )
3040 imms[i] = bld_base->base.undef;
3041
3042 if (bld->use_immediates_array) {
3043 unsigned index = bld->num_immediates;
3044 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3045 LLVMBuilderRef builder = gallivm->builder;
3046 LLVMValueRef gep[2];
3047 gep[0] = lp_build_const_int32(gallivm, 0);
3048
3049 assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE));
3050 for (i = 0; i < 4; ++i ) {
3051 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3052 LLVMValueRef imm_ptr = LLVMBuildGEP2(builder,
3053 bld->bld_base.base.vec_type,
3054 bld->imms_array, gep, 2, "");
3055 LLVMBuildStore(builder, imms[i], imm_ptr);
3056 }
3057 } else {
3058 /* simply copy the immediate values into the next immediates[] slot */
3059 unsigned i;
3060 assert(imm->Immediate.NrTokens - 1 <= 4);
3061 assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES);
3062
3063 for(i = 0; i < 4; ++i )
3064 bld->immediates[bld->num_immediates][i] = imms[i];
3065
3066 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
3067 unsigned index = bld->num_immediates;
3068 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
3069 LLVMBuilderRef builder = gallivm->builder;
3070 LLVMValueRef gep[2];
3071 gep[0] = lp_build_const_int32(gallivm, 0);
3072 for (i = 0; i < 4; ++i ) {
3073 gep[1] = lp_build_const_int32(gallivm, index * 4 + i);
3074 LLVMValueRef imm_ptr = LLVMBuildGEP2(builder,
3075 bld->bld_base.base.vec_type,
3076 bld->imms_array, gep, 2, "");
3077 LLVMBuildStore(builder,
3078 bld->immediates[index][i],
3079 imm_ptr);
3080 }
3081 }
3082 }
3083
3084 bld->num_immediates++;
3085 }
3086
3087 static void
ddx_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3088 ddx_emit(
3089 const struct lp_build_tgsi_action * action,
3090 struct lp_build_tgsi_context * bld_base,
3091 struct lp_build_emit_data * emit_data)
3092 {
3093 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3094
3095 emit_fetch_deriv(bld, emit_data->args[0], NULL,
3096 &emit_data->output[emit_data->chan], NULL);
3097 }
3098
3099 static void
ddy_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3100 ddy_emit(
3101 const struct lp_build_tgsi_action * action,
3102 struct lp_build_tgsi_context * bld_base,
3103 struct lp_build_emit_data * emit_data)
3104 {
3105 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3106
3107 emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL,
3108 &emit_data->output[emit_data->chan]);
3109 }
3110
3111 static void
kill_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3112 kill_emit(
3113 const struct lp_build_tgsi_action * action,
3114 struct lp_build_tgsi_context * bld_base,
3115 struct lp_build_emit_data * emit_data)
3116 {
3117 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3118
3119 emit_kill(bld, bld_base->pc - 1);
3120 }
3121
3122 static void
kill_if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3123 kill_if_emit(
3124 const struct lp_build_tgsi_action * action,
3125 struct lp_build_tgsi_context * bld_base,
3126 struct lp_build_emit_data * emit_data)
3127 {
3128 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3129
3130 emit_kill_if(bld, emit_data->inst, bld_base->pc - 1);
3131 }
3132
3133 static void
tex_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3134 tex_emit(
3135 const struct lp_build_tgsi_action * action,
3136 struct lp_build_tgsi_context * bld_base,
3137 struct lp_build_emit_data * emit_data)
3138 {
3139 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3140
3141 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3142 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3143 }
3144
3145 static void
tex2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3146 tex2_emit(
3147 const struct lp_build_tgsi_action * action,
3148 struct lp_build_tgsi_context * bld_base,
3149 struct lp_build_emit_data * emit_data)
3150 {
3151 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3152
3153 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3154 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3155 }
3156
3157 static void
txb_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3158 txb_emit(
3159 const struct lp_build_tgsi_action * action,
3160 struct lp_build_tgsi_context * bld_base,
3161 struct lp_build_emit_data * emit_data)
3162 {
3163 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3164
3165 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3166 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3167 }
3168
3169 static void
txb2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3170 txb2_emit(
3171 const struct lp_build_tgsi_action * action,
3172 struct lp_build_tgsi_context * bld_base,
3173 struct lp_build_emit_data * emit_data)
3174 {
3175 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3176
3177 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3178 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3179 }
3180
3181 static void
txd_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3182 txd_emit(
3183 const struct lp_build_tgsi_action * action,
3184 struct lp_build_tgsi_context * bld_base,
3185 struct lp_build_emit_data * emit_data)
3186 {
3187 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3188
3189 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3190 emit_data->output, 3, LP_SAMPLER_OP_TEXTURE);
3191 }
3192
3193 static void
txl_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3194 txl_emit(
3195 const struct lp_build_tgsi_action * action,
3196 struct lp_build_tgsi_context * bld_base,
3197 struct lp_build_emit_data * emit_data)
3198 {
3199 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3200
3201 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3202 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3203 }
3204
3205 static void
txl2_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3206 txl2_emit(
3207 const struct lp_build_tgsi_action * action,
3208 struct lp_build_tgsi_context * bld_base,
3209 struct lp_build_emit_data * emit_data)
3210 {
3211 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3212
3213 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3214 emit_data->output, 2, LP_SAMPLER_OP_TEXTURE);
3215 }
3216
3217 static void
txp_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3218 txp_emit(
3219 const struct lp_build_tgsi_action * action,
3220 struct lp_build_tgsi_context * bld_base,
3221 struct lp_build_emit_data * emit_data)
3222 {
3223 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3224
3225 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED,
3226 emit_data->output, 1, LP_SAMPLER_OP_TEXTURE);
3227 }
3228
3229 static void
tg4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3230 tg4_emit(
3231 const struct lp_build_tgsi_action * action,
3232 struct lp_build_tgsi_context * bld_base,
3233 struct lp_build_emit_data * emit_data)
3234 {
3235 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3236
3237 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3238 emit_data->output, 2, LP_SAMPLER_OP_GATHER);
3239 }
3240
3241 static void
lodq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3242 lodq_emit(
3243 const struct lp_build_tgsi_action * action,
3244 struct lp_build_tgsi_context * bld_base,
3245 struct lp_build_emit_data * emit_data)
3246 {
3247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3248
3249 emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3250 emit_data->output, 1, LP_SAMPLER_OP_LODQ);
3251 }
3252
3253 static void
txq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3254 txq_emit(
3255 const struct lp_build_tgsi_action * action,
3256 struct lp_build_tgsi_context * bld_base,
3257 struct lp_build_emit_data * emit_data)
3258 {
3259 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3260
3261 emit_size_query(bld, emit_data->inst, emit_data->output, false);
3262 }
3263
3264 static void
txf_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3265 txf_emit(
3266 const struct lp_build_tgsi_action * action,
3267 struct lp_build_tgsi_context * bld_base,
3268 struct lp_build_emit_data * emit_data)
3269 {
3270 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3271
3272 emit_fetch_texels(bld, emit_data->inst, emit_data->output, false);
3273 }
3274
3275 static void
sample_i_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3276 sample_i_emit(
3277 const struct lp_build_tgsi_action * action,
3278 struct lp_build_tgsi_context * bld_base,
3279 struct lp_build_emit_data * emit_data)
3280 {
3281 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3282
3283 emit_fetch_texels(bld, emit_data->inst, emit_data->output, true);
3284 }
3285
3286 static void
sample_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3287 sample_emit(
3288 const struct lp_build_tgsi_action * action,
3289 struct lp_build_tgsi_context * bld_base,
3290 struct lp_build_emit_data * emit_data)
3291 {
3292 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3293
3294 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3295 false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3296 }
3297
3298 static void
sample_b_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3299 sample_b_emit(
3300 const struct lp_build_tgsi_action * action,
3301 struct lp_build_tgsi_context * bld_base,
3302 struct lp_build_emit_data * emit_data)
3303 {
3304 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3305
3306 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS,
3307 false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3308 }
3309
3310 static void
sample_c_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3311 sample_c_emit(
3312 const struct lp_build_tgsi_action * action,
3313 struct lp_build_tgsi_context * bld_base,
3314 struct lp_build_emit_data * emit_data)
3315 {
3316 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3317
3318 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3319 true, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3320 }
3321
3322 static void
sample_c_lz_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3323 sample_c_lz_emit(
3324 const struct lp_build_tgsi_action * action,
3325 struct lp_build_tgsi_context * bld_base,
3326 struct lp_build_emit_data * emit_data)
3327 {
3328 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3329
3330 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO,
3331 true, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3332 }
3333
3334 static void
sample_d_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3335 sample_d_emit(
3336 const struct lp_build_tgsi_action * action,
3337 struct lp_build_tgsi_context * bld_base,
3338 struct lp_build_emit_data * emit_data)
3339 {
3340 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3341
3342 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV,
3343 false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3344 }
3345
3346 static void
sample_l_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3347 sample_l_emit(
3348 const struct lp_build_tgsi_action * action,
3349 struct lp_build_tgsi_context * bld_base,
3350 struct lp_build_emit_data * emit_data)
3351 {
3352 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3353
3354 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD,
3355 false, LP_SAMPLER_OP_TEXTURE, emit_data->output);
3356 }
3357
3358 static void
gather4_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3359 gather4_emit(
3360 const struct lp_build_tgsi_action * action,
3361 struct lp_build_tgsi_context * bld_base,
3362 struct lp_build_emit_data * emit_data)
3363 {
3364 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3365
3366 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3367 false, LP_SAMPLER_OP_GATHER, emit_data->output);
3368 }
3369
3370 static void
sviewinfo_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3371 sviewinfo_emit(
3372 const struct lp_build_tgsi_action * action,
3373 struct lp_build_tgsi_context * bld_base,
3374 struct lp_build_emit_data * emit_data)
3375 {
3376 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3377
3378 emit_size_query(bld, emit_data->inst, emit_data->output, true);
3379 }
3380
3381 static void
lod_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3382 lod_emit(
3383 const struct lp_build_tgsi_action * action,
3384 struct lp_build_tgsi_context * bld_base,
3385 struct lp_build_emit_data * emit_data)
3386 {
3387 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3388
3389 emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE,
3390 false, LP_SAMPLER_OP_LODQ, emit_data->output);
3391 }
3392
3393 static void
target_to_dims_layer(enum tgsi_texture_type target,unsigned * dims,unsigned * layer_coord)3394 target_to_dims_layer(enum tgsi_texture_type target,
3395 unsigned *dims,
3396 unsigned *layer_coord)
3397 {
3398 *layer_coord = 0;
3399 switch (target) {
3400 case TGSI_TEXTURE_1D:
3401 case TGSI_TEXTURE_BUFFER:
3402 *dims = 1;
3403 break;
3404 case TGSI_TEXTURE_1D_ARRAY:
3405 *layer_coord = 1;
3406 *dims = 1;
3407 break;
3408 case TGSI_TEXTURE_2D:
3409 case TGSI_TEXTURE_RECT:
3410 *dims = 2;
3411 break;
3412 case TGSI_TEXTURE_2D_ARRAY:
3413 *layer_coord = 2;
3414 *dims = 2;
3415 break;
3416 case TGSI_TEXTURE_3D:
3417 case TGSI_TEXTURE_CUBE:
3418 case TGSI_TEXTURE_CUBE_ARRAY:
3419 *dims = 3;
3420 break;
3421 default:
3422 assert(0);
3423 *dims = 0;
3424 return;
3425 }
3426 }
3427
3428 static void
img_load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3429 img_load_emit(
3430 const struct lp_build_tgsi_action * action,
3431 struct lp_build_tgsi_context * bld_base,
3432 struct lp_build_emit_data * emit_data)
3433 {
3434 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3435 struct lp_img_params params = { 0 };
3436 LLVMValueRef coords[5];
3437 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3438 unsigned dims;
3439 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3440 unsigned layer_coord;
3441
3442 target_to_dims_layer(target, &dims, &layer_coord);
3443
3444 for (unsigned i = 0; i < dims; i++) {
3445 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3446 }
3447 for (unsigned i = dims; i < 5; i++) {
3448 coords[i] = coord_undef;
3449 }
3450 if (layer_coord)
3451 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3452
3453 params.type = bld->bld_base.base.type;
3454 params.resources_type = bld->resources_type;
3455 params.resources_ptr = bld->resources_ptr;
3456 params.thread_data_type = bld->thread_data_type;
3457 params.thread_data_ptr = bld->thread_data_ptr;
3458 params.coords = coords;
3459 params.outdata = emit_data->output;
3460 params.target = tgsi_to_pipe_tex_target(target);
3461 params.image_index = emit_data->inst->Src[0].Register.Index;
3462 params.img_op = LP_IMG_LOAD;
3463 bld->image->emit_op(bld->image,
3464 bld->bld_base.base.gallivm,
3465 ¶ms);
3466 }
3467
3468 static void
load_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3469 load_emit(
3470 const struct lp_build_tgsi_action * action,
3471 struct lp_build_tgsi_context * bld_base,
3472 struct lp_build_emit_data * emit_data)
3473 {
3474 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3475 struct gallivm_state * gallivm = bld_base->base.gallivm;
3476 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3477 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3478 unsigned buf = bufreg->Register.Index;
3479 assert(bufreg->Register.File == TGSI_FILE_BUFFER ||
3480 bufreg->Register.File == TGSI_FILE_IMAGE ||
3481 bufreg->Register.File == TGSI_FILE_MEMORY ||
3482 bufreg->Register.File == TGSI_FILE_CONSTBUF);
3483 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3484 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3485
3486 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3487 img_load_emit(action, bld_base, emit_data);
3488 } else if (bufreg->Register.File == TGSI_FILE_CONSTBUF) {
3489 LLVMValueRef consts_ptr = bld->consts[buf];
3490 LLVMValueRef num_consts = bld->consts_sizes[buf];
3491
3492 LLVMValueRef indirect_index;
3493 LLVMValueRef overflow_mask;
3494
3495 indirect_index = lp_build_emit_fetch(bld_base, emit_data->inst, 1, 0);
3496 indirect_index = lp_build_shr_imm(uint_bld, indirect_index, 4);
3497
3498 /* All fetches are from the same constant buffer, so
3499 * we need to propagate the size to a vector to do a
3500 * vector comparison */
3501 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
3502
3503 /* Gather values from the constant buffer */
3504 unsigned chan_index;
3505 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3506 /* Construct a boolean vector telling us which channels
3507 * overflow the bound constant buffer */
3508 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
3509 indirect_index, num_consts);
3510
3511 /* index_vec = indirect_index * 4 */
3512 LLVMValueRef index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2);
3513 index_vec = lp_build_add(uint_bld, index_vec,
3514 lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3515
3516 emit_data->output[chan_index] = build_gather(bld_base, consts_ptr, index_vec, overflow_mask, NULL);
3517 }
3518 } else if (0) {
3519 /* for indirect support with ARB_gpu_shader5 */
3520 } else {
3521 LLVMValueRef index;
3522 LLVMValueRef scalar, scalar_ptr;
3523 unsigned chan_index;
3524
3525 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3526 index = lp_build_shr_imm(uint_bld, index, 2);
3527
3528 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3529
3530 LLVMValueRef ssbo_limit = NULL;
3531
3532 if (!is_shared) {
3533 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3534 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3535 }
3536
3537 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3538 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3539
3540 LLVMValueRef exec_mask = mask_vec(bld_base);
3541 if (!is_shared) {
3542 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3543 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3544 }
3545
3546 LLVMValueRef result = lp_build_alloca(gallivm, uint_bld->vec_type, "");
3547 struct lp_build_loop_state loop_state;
3548 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3549
3550 struct lp_build_if_state ifthen;
3551 LLVMValueRef cond, temp_res;
3552
3553 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3554 loop_state.counter, "");
3555
3556 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3557 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3558
3559 lp_build_if(&ifthen, gallivm, cond);
3560 scalar = lp_build_pointer_get2(builder, uint_bld->elem_type, scalar_ptr, loop_index);
3561
3562 temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, result, "");
3563 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3564 LLVMBuildStore(builder, temp_res, result);
3565 lp_build_else(&ifthen);
3566 temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, result, "");
3567 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3568 LLVMBuildStore(builder, temp_res, result);
3569 lp_build_endif(&ifthen);
3570 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3571 NULL, LLVMIntUGE);
3572 emit_data->output[chan_index] = LLVMBuildLoad2(gallivm->builder, uint_bld->vec_type,
3573 result, "");
3574 }
3575 }
3576 }
3577
3578 static void
img_store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3579 img_store_emit(
3580 const struct lp_build_tgsi_action * action,
3581 struct lp_build_tgsi_context * bld_base,
3582 struct lp_build_emit_data * emit_data)
3583 {
3584 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3585 struct lp_img_params params = { 0 };
3586 LLVMValueRef coords[5];
3587 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3588 unsigned dims;
3589 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3590 unsigned layer_coord;
3591
3592 target_to_dims_layer(target, &dims, &layer_coord);
3593 for (unsigned i = 0; i < dims; i++) {
3594 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, i);
3595 }
3596 for (unsigned i = dims; i < 5; i++) {
3597 coords[i] = coord_undef;
3598 }
3599 if (layer_coord)
3600 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, layer_coord);
3601
3602 params.type = bld->bld_base.base.type;
3603 params.resources_type = bld->resources_type;
3604 params.resources_ptr = bld->resources_ptr;
3605 params.thread_data_type = bld->thread_data_type;
3606 params.thread_data_ptr = bld->thread_data_ptr;
3607 params.coords = coords;
3608 params.outdata = NULL;
3609 params.exec_mask = mask_vec(bld_base);
3610 params.target = tgsi_to_pipe_tex_target(target);
3611 params.image_index = emit_data->inst->Dst[0].Register.Index;
3612 params.img_op = LP_IMG_STORE;
3613 for (unsigned i = 0; i < 4; i++)
3614 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3615
3616 bld->image->emit_op(bld->image,
3617 bld->bld_base.base.gallivm,
3618 ¶ms);
3619 }
3620
3621 static void
store_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3622 store_emit(
3623 const struct lp_build_tgsi_action * action,
3624 struct lp_build_tgsi_context * bld_base,
3625 struct lp_build_emit_data * emit_data)
3626 {
3627 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3628 struct gallivm_state * gallivm = bld_base->base.gallivm;
3629 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3630 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3631 const struct tgsi_full_dst_register *bufreg = &emit_data->inst->Dst[0];
3632 unsigned buf = bufreg->Register.Index;
3633 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3634 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3635
3636 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3637 img_store_emit(action, bld_base, emit_data);
3638 } else if (0) {
3639
3640 } else {
3641 LLVMValueRef index; /* index into the const buffer */
3642 LLVMValueRef scalar_ptr;
3643 LLVMValueRef value;
3644 unsigned chan_index;
3645
3646 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 0, 0);
3647 index = lp_build_shr_imm(uint_bld, index, 2);
3648
3649 scalar_ptr = is_shared ? bld->shared_ptr : bld->ssbos[buf];
3650
3651 LLVMValueRef ssbo_limit = NULL;
3652
3653 if (!is_shared) {
3654 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3655 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3656 }
3657
3658 TGSI_FOR_EACH_DST0_ENABLED_CHANNEL(emit_data->inst, chan_index) {
3659 LLVMValueRef loop_index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, chan_index));
3660
3661 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, chan_index);
3662
3663 LLVMValueRef exec_mask = mask_vec(bld_base);
3664 if (!is_shared) {
3665 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, loop_index, ssbo_limit);
3666 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3667 }
3668
3669 struct lp_build_loop_state loop_state;
3670 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3671
3672 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3673 loop_state.counter, "");
3674 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3675
3676 struct lp_build_if_state ifthen;
3677 LLVMValueRef cond;
3678
3679 loop_index = LLVMBuildExtractElement(gallivm->builder, loop_index,
3680 loop_state.counter, "");
3681
3682 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3683 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3684 lp_build_if(&ifthen, gallivm, cond);
3685
3686 lp_build_pointer_set(builder, scalar_ptr, loop_index, value_ptr);
3687
3688 lp_build_endif(&ifthen);
3689 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3690 NULL, LLVMIntUGE);
3691 }
3692 }
3693 }
3694
3695 static void
resq_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3696 resq_emit(
3697 const struct lp_build_tgsi_action * action,
3698 struct lp_build_tgsi_context * bld_base,
3699 struct lp_build_emit_data * emit_data)
3700 {
3701 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3702 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3703 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3704
3705 unsigned buf = bufreg->Register.Index;
3706 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE);
3707
3708 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3709 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3710 struct lp_sampler_size_query_params params = { 0 };
3711 params.int_type = bld->bld_base.int_bld.type;
3712 params.texture_unit = buf;
3713 params.target = tgsi_to_pipe_tex_target(target);
3714 params.resources_type = bld->resources_type;
3715 params.resources_ptr = bld->resources_ptr;
3716 params.sizes_out = emit_data->output;
3717
3718 bld->image->emit_size_query(bld->image,
3719 bld->bld_base.base.gallivm,
3720 ¶ms);
3721 } else {
3722 LLVMValueRef num_ssbo = bld->ssbo_sizes[buf];
3723
3724 emit_data->output[emit_data->chan] = lp_build_broadcast_scalar(uint_bld, num_ssbo);
3725 }
3726 }
3727
3728 static void
img_atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data,LLVMAtomicRMWBinOp op)3729 img_atomic_emit(
3730 const struct lp_build_tgsi_action * action,
3731 struct lp_build_tgsi_context * bld_base,
3732 struct lp_build_emit_data * emit_data,
3733 LLVMAtomicRMWBinOp op)
3734 {
3735 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3736 struct lp_img_params params = { 0 };
3737 LLVMValueRef coords[5];
3738 LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type);
3739 unsigned dims;
3740 unsigned layer_coord;
3741 enum tgsi_texture_type target = emit_data->inst->Memory.Texture;
3742
3743 target_to_dims_layer(target, &dims, &layer_coord);
3744
3745 for (unsigned i = 0; i < dims; i++) {
3746 coords[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, i);
3747 }
3748 for (unsigned i = dims; i < 5; i++) {
3749 coords[i] = coord_undef;
3750 }
3751 if (layer_coord)
3752 coords[2] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, layer_coord);
3753
3754 params.type = bld->bld_base.base.type;
3755 params.resources_type = bld->resources_type;
3756 params.resources_ptr = bld->resources_ptr;
3757 params.thread_data_ptr = bld->thread_data_ptr;
3758 params.exec_mask = mask_vec(bld_base);
3759 params.image_index = emit_data->inst->Src[0].Register.Index;
3760 params.coords = coords;
3761 params.target = tgsi_to_pipe_tex_target(target);
3762 params.op = op;
3763 params.outdata = emit_data->output;
3764 params.img_op = (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) ? LP_IMG_ATOMIC_CAS : LP_IMG_ATOMIC;
3765
3766 for (unsigned i = 0; i < 4; i++)
3767 params.indata[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, i);
3768 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3769 for (unsigned i = 0; i < 4; i++)
3770 params.indata2[i] = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, i);
3771 }
3772 bld->image->emit_op(bld->image,
3773 bld->bld_base.base.gallivm,
3774 ¶ms);
3775 }
3776
3777 static void
atomic_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3778 atomic_emit(
3779 const struct lp_build_tgsi_action * action,
3780 struct lp_build_tgsi_context * bld_base,
3781 struct lp_build_emit_data * emit_data)
3782 {
3783 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3784 struct gallivm_state * gallivm = bld_base->base.gallivm;
3785 LLVMBuilderRef builder = gallivm->builder;
3786 struct lp_build_context *uint_bld = &bld_base->uint_bld;
3787 const struct tgsi_full_src_register *bufreg = &emit_data->inst->Src[0];
3788
3789 assert(bufreg->Register.File == TGSI_FILE_BUFFER || bufreg->Register.File == TGSI_FILE_IMAGE || bufreg->Register.File == TGSI_FILE_MEMORY);
3790 unsigned buf = bufreg->Register.Index;
3791 bool is_shared = bufreg->Register.File == TGSI_FILE_MEMORY;
3792
3793 LLVMAtomicRMWBinOp op = -1;
3794 switch (emit_data->inst->Instruction.Opcode) {
3795 case TGSI_OPCODE_ATOMUADD:
3796 op = LLVMAtomicRMWBinOpAdd;
3797 break;
3798 case TGSI_OPCODE_ATOMXCHG:
3799 op = LLVMAtomicRMWBinOpXchg;
3800 break;
3801 case TGSI_OPCODE_ATOMAND:
3802 op = LLVMAtomicRMWBinOpAnd;
3803 break;
3804 case TGSI_OPCODE_ATOMOR:
3805 op = LLVMAtomicRMWBinOpOr;
3806 break;
3807 case TGSI_OPCODE_ATOMXOR:
3808 op = LLVMAtomicRMWBinOpXor;
3809 break;
3810 case TGSI_OPCODE_ATOMUMIN:
3811 op = LLVMAtomicRMWBinOpUMin;
3812 break;
3813 case TGSI_OPCODE_ATOMUMAX:
3814 op = LLVMAtomicRMWBinOpUMax;
3815 break;
3816 case TGSI_OPCODE_ATOMIMIN:
3817 op = LLVMAtomicRMWBinOpMin;
3818 break;
3819 case TGSI_OPCODE_ATOMIMAX:
3820 op = LLVMAtomicRMWBinOpMax;
3821 break;
3822 case TGSI_OPCODE_ATOMCAS:
3823 break;
3824 default:
3825 assert(0);
3826 return;
3827 }
3828
3829 if (bufreg->Register.File == TGSI_FILE_IMAGE) {
3830 img_atomic_emit(action, bld_base, emit_data, op);
3831 } else if (0) {
3832 } else {
3833 LLVMValueRef index; /* index into the const buffer */
3834 LLVMValueRef scalar, scalar_ptr;
3835 LLVMValueRef value;
3836
3837 index = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 1, 0);
3838 value = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 2, 0);
3839
3840 index = lp_build_shr_imm(uint_bld, index, 2);
3841
3842 if (!is_shared) {
3843 index = lp_build_add(uint_bld, index, lp_build_const_int_vec(gallivm, uint_bld->type, emit_data->chan));
3844 scalar_ptr = bld->ssbos[buf];
3845 } else
3846 scalar_ptr = bld->shared_ptr;
3847
3848 LLVMValueRef atom_res = lp_build_alloca(gallivm,
3849 uint_bld->vec_type, "");
3850
3851 LLVMValueRef ssbo_limit = NULL;
3852 if (!is_shared) {
3853 ssbo_limit = LLVMBuildAShr(gallivm->builder, bld->ssbo_sizes[buf], lp_build_const_int32(gallivm, 2), "");
3854 ssbo_limit = lp_build_broadcast_scalar(uint_bld, ssbo_limit);
3855 }
3856
3857 LLVMValueRef exec_mask = mask_vec(bld_base);
3858
3859 if (!is_shared) {
3860 LLVMValueRef ssbo_oob_cmp = lp_build_cmp(uint_bld, PIPE_FUNC_LESS, index, ssbo_limit);
3861 exec_mask = LLVMBuildAnd(builder, exec_mask, ssbo_oob_cmp, "");
3862 }
3863
3864 struct lp_build_loop_state loop_state;
3865 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
3866
3867 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, value,
3868 loop_state.counter, "");
3869 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, uint_bld->elem_type, "");
3870
3871 index = LLVMBuildExtractElement(gallivm->builder, index,
3872 loop_state.counter, "");
3873
3874 scalar_ptr = LLVMBuildGEP2(builder, uint_bld->elem_type, scalar_ptr,
3875 &index, 1, "");
3876
3877 struct lp_build_if_state ifthen;
3878 LLVMValueRef cond, temp_res;
3879
3880 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
3881 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
3882 lp_build_if(&ifthen, gallivm, cond);
3883
3884 if (emit_data->inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3885 LLVMValueRef cas_src = lp_build_emit_fetch(&bld->bld_base, emit_data->inst, 3, 0);
3886 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, cas_src,
3887 loop_state.counter, "");
3888 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, uint_bld->elem_type, "");
3889 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
3890 cas_src_ptr,
3891 LLVMAtomicOrderingSequentiallyConsistent,
3892 LLVMAtomicOrderingSequentiallyConsistent,
3893 false);
3894 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
3895 } else {
3896 scalar = LLVMBuildAtomicRMW(builder, op,
3897 scalar_ptr, value_ptr,
3898 LLVMAtomicOrderingSequentiallyConsistent,
3899 false);
3900 }
3901 temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, atom_res, "");
3902 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
3903 LLVMBuildStore(builder, temp_res, atom_res);
3904 lp_build_else(&ifthen);
3905 temp_res = LLVMBuildLoad2(builder, uint_bld->vec_type, atom_res, "");
3906 temp_res = LLVMBuildInsertElement(builder, temp_res, lp_build_const_int32(gallivm, 0), loop_state.counter, "");
3907 LLVMBuildStore(builder, temp_res, atom_res);
3908 lp_build_endif(&ifthen);
3909
3910 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
3911 NULL, LLVMIntUGE);
3912 emit_data->output[emit_data->chan] = LLVMBuildLoad2(gallivm->builder, uint_bld->vec_type, atom_res, "");
3913 }
3914 }
3915
3916 static void
barrier_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3917 barrier_emit(
3918 const struct lp_build_tgsi_action * action,
3919 struct lp_build_tgsi_context * bld_base,
3920 struct lp_build_emit_data * emit_data)
3921 {
3922 struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base);
3923 struct gallivm_state * gallivm = bld_base->base.gallivm;
3924
3925 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
3926
3927 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
3928 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
3929 }
3930
3931 static void
membar_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3932 membar_emit(
3933 const struct lp_build_tgsi_action * action,
3934 struct lp_build_tgsi_context * bld_base,
3935 struct lp_build_emit_data * emit_data)
3936 {
3937 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3938 LLVMBuildFence(builder, LLVMAtomicOrderingSequentiallyConsistent, false, "");
3939 }
3940
3941 static void
increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3942 increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base,
3943 LLVMValueRef ptr,
3944 LLVMValueRef mask)
3945 {
3946 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3947 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
3948
3949 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
3950
3951 LLVMBuildStore(builder, current_vec, ptr);
3952 }
3953
3954 static void
clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)3955 clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base,
3956 LLVMValueRef ptr,
3957 LLVMValueRef mask)
3958 {
3959 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
3960 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
3961
3962 current_vec = lp_build_select(&bld_base->uint_bld,
3963 mask,
3964 bld_base->uint_bld.zero,
3965 current_vec);
3966
3967 LLVMBuildStore(builder, current_vec, ptr);
3968 }
3969
3970 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)3971 clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld,
3972 LLVMValueRef current_mask_vec,
3973 LLVMValueRef total_emitted_vertices_vec)
3974 {
3975 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3976 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
3977 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
3978 total_emitted_vertices_vec,
3979 bld->max_output_vertices_vec);
3980
3981 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
3982 }
3983
3984 static void
emit_vertex(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)3985 emit_vertex(
3986 const struct lp_build_tgsi_action * action,
3987 struct lp_build_tgsi_context * bld_base,
3988 struct lp_build_emit_data * emit_data)
3989 {
3990 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
3991 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
3992
3993 if (bld->gs_iface->emit_vertex) {
3994 LLVMValueRef stream_id = emit_fetch_immediate(bld_base, &emit_data->inst->Src[0],
3995 TGSI_TYPE_UNSIGNED,
3996 emit_data->inst->Src[0].Register.SwizzleX);
3997 LLVMValueRef mask = mask_vec(bld_base);
3998 LLVMValueRef total_emitted_vertices_vec =
3999 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr, "");
4000
4001 mask = clamp_mask_to_max_output_vertices(bld, mask,
4002 total_emitted_vertices_vec);
4003 gather_outputs(bld);
4004 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
4005 bld->outputs,
4006 total_emitted_vertices_vec,
4007 mask,
4008 stream_id);
4009 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr,
4010 mask);
4011 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr,
4012 mask);
4013 #if DUMP_GS_EMITS
4014 lp_build_print_value(bld->bld_base.base.gallivm,
4015 " +++ emit vertex masked ones = ",
4016 mask);
4017 lp_build_print_value(bld->bld_base.base.gallivm,
4018 " +++ emit vertex emitted = ",
4019 total_emitted_vertices_vec);
4020 #endif
4021 }
4022 }
4023
4024
4025 static void
end_primitive_masked(struct lp_build_tgsi_context * bld_base,LLVMValueRef mask)4026 end_primitive_masked(struct lp_build_tgsi_context * bld_base,
4027 LLVMValueRef mask)
4028 {
4029 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4030 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
4031
4032 if (bld->gs_iface->end_primitive) {
4033 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4034 LLVMValueRef emitted_vertices_vec =
4035 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4036 bld->emitted_vertices_vec_ptr, "");
4037 LLVMValueRef emitted_prims_vec =
4038 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4039 bld->emitted_prims_vec_ptr, "");
4040 LLVMValueRef total_emitted_vertices_vec =
4041 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4042 bld->total_emitted_vertices_vec_ptr, "");
4043 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4044 emitted_vertices_vec,
4045 uint_bld->zero);
4046 /* We need to combine the current execution mask with the mask
4047 telling us which, if any, execution slots actually have
4048 unemitted primitives, this way we make sure that end_primitives
4049 executes only on the paths that have unflushed vertices */
4050 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
4051
4052 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
4053 total_emitted_vertices_vec,
4054 emitted_vertices_vec,
4055 emitted_prims_vec,
4056 mask_vec(bld_base), 0);
4057
4058 #if DUMP_GS_EMITS
4059 lp_build_print_value(bld->bld_base.base.gallivm,
4060 " +++ end prim masked ones = ",
4061 mask);
4062 lp_build_print_value(bld->bld_base.base.gallivm,
4063 " +++ end prim emitted verts1 = ",
4064 emitted_vertices_vec);
4065 lp_build_print_value(bld->bld_base.base.gallivm,
4066 " +++ end prim emitted prims1 = ",
4067 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4068 bld->emitted_prims_vec_ptr, ""));
4069 #endif
4070 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr,
4071 mask);
4072 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr,
4073 mask);
4074 #if DUMP_GS_EMITS
4075 lp_build_print_value(bld->bld_base.base.gallivm,
4076 " +++ end prim emitted verts2 = ",
4077 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type,
4078 bld->emitted_vertices_vec_ptr, ""));
4079 #endif
4080 }
4081
4082 }
4083
4084 static void
end_primitive(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4085 end_primitive(
4086 const struct lp_build_tgsi_action * action,
4087 struct lp_build_tgsi_context * bld_base,
4088 struct lp_build_emit_data * emit_data)
4089 {
4090 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4091
4092 if (bld->gs_iface->end_primitive) {
4093 LLVMValueRef mask = mask_vec(bld_base);
4094 end_primitive_masked(bld_base, mask);
4095 }
4096 }
4097
4098 static void
barrier_emit_tcs(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4099 barrier_emit_tcs(
4100 const struct lp_build_tgsi_action * action,
4101 struct lp_build_tgsi_context * bld_base,
4102 struct lp_build_emit_data * emit_data)
4103 {
4104 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4105
4106 if (bld->tcs_iface->emit_barrier) {
4107 bld->tcs_iface->emit_barrier((struct lp_build_context*)bld_base);
4108 }
4109 }
4110
4111
4112 static void
cal_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4113 cal_emit(
4114 const struct lp_build_tgsi_action * action,
4115 struct lp_build_tgsi_context * bld_base,
4116 struct lp_build_emit_data * emit_data)
4117 {
4118 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4119
4120 lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label,
4121 &bld_base->pc);
4122 }
4123
4124 static void
ret_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4125 ret_emit(
4126 const struct lp_build_tgsi_action * action,
4127 struct lp_build_tgsi_context * bld_base,
4128 struct lp_build_emit_data * emit_data)
4129 {
4130 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4131
4132 lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc);
4133 }
4134
4135 static void
brk_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4136 brk_emit(
4137 const struct lp_build_tgsi_action * action,
4138 struct lp_build_tgsi_context * bld_base,
4139 struct lp_build_emit_data * emit_data)
4140 {
4141 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4142
4143 lp_exec_tgsi_break(&bld->exec_mask, bld_base);
4144 }
4145
4146 static void
if_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4147 if_emit(
4148 const struct lp_build_tgsi_action * action,
4149 struct lp_build_tgsi_context * bld_base,
4150 struct lp_build_emit_data * emit_data)
4151 {
4152 LLVMValueRef tmp;
4153 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4154
4155 tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL,
4156 emit_data->args[0], bld->bld_base.base.zero);
4157 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4158 }
4159
4160 static void
uif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4161 uif_emit(
4162 const struct lp_build_tgsi_action * action,
4163 struct lp_build_tgsi_context * bld_base,
4164 struct lp_build_emit_data * emit_data)
4165 {
4166 LLVMValueRef tmp;
4167 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4168 struct lp_build_context *uint_bld = &bld_base->uint_bld;
4169
4170 tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL,
4171 emit_data->args[0], uint_bld->zero);
4172 lp_exec_mask_cond_push(&bld->exec_mask, tmp);
4173 }
4174
4175 static void
case_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4176 case_emit(
4177 const struct lp_build_tgsi_action * action,
4178 struct lp_build_tgsi_context * bld_base,
4179 struct lp_build_emit_data * emit_data)
4180 {
4181 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4182
4183 lp_exec_case(&bld->exec_mask, emit_data->args[0]);
4184 }
4185
4186 static void
default_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4187 default_emit(
4188 const struct lp_build_tgsi_action * action,
4189 struct lp_build_tgsi_context * bld_base,
4190 struct lp_build_emit_data * emit_data)
4191 {
4192 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4193
4194 lp_exec_default(&bld->exec_mask, bld_base);
4195 }
4196
4197 static void
switch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4198 switch_emit(
4199 const struct lp_build_tgsi_action * action,
4200 struct lp_build_tgsi_context * bld_base,
4201 struct lp_build_emit_data * emit_data)
4202 {
4203 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4204
4205 lp_exec_switch(&bld->exec_mask, emit_data->args[0]);
4206 }
4207
4208 static void
endswitch_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4209 endswitch_emit(
4210 const struct lp_build_tgsi_action * action,
4211 struct lp_build_tgsi_context * bld_base,
4212 struct lp_build_emit_data * emit_data)
4213 {
4214 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4215
4216 lp_exec_endswitch(&bld->exec_mask, bld_base);
4217 }
4218
4219 static void
bgnloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4220 bgnloop_emit(
4221 const struct lp_build_tgsi_action * action,
4222 struct lp_build_tgsi_context * bld_base,
4223 struct lp_build_emit_data * emit_data)
4224 {
4225 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4226
4227 lp_exec_bgnloop(&bld->exec_mask, true);
4228 }
4229
4230 static void
bgnsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4231 bgnsub_emit(
4232 const struct lp_build_tgsi_action * action,
4233 struct lp_build_tgsi_context * bld_base,
4234 struct lp_build_emit_data * emit_data)
4235 {
4236 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4237
4238 lp_exec_mask_bgnsub(&bld->exec_mask);
4239 }
4240
4241 static void
else_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4242 else_emit(
4243 const struct lp_build_tgsi_action * action,
4244 struct lp_build_tgsi_context * bld_base,
4245 struct lp_build_emit_data * emit_data)
4246 {
4247 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4248
4249 lp_exec_mask_cond_invert(&bld->exec_mask);
4250 }
4251
4252 static void
endif_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4253 endif_emit(
4254 const struct lp_build_tgsi_action * action,
4255 struct lp_build_tgsi_context * bld_base,
4256 struct lp_build_emit_data * emit_data)
4257 {
4258 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4259
4260 lp_exec_mask_cond_pop(&bld->exec_mask);
4261 }
4262
4263 static void
endloop_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4264 endloop_emit(
4265 const struct lp_build_tgsi_action * action,
4266 struct lp_build_tgsi_context * bld_base,
4267 struct lp_build_emit_data * emit_data)
4268 {
4269 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4270
4271 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
4272 }
4273
4274 static void
endsub_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4275 endsub_emit(
4276 const struct lp_build_tgsi_action * action,
4277 struct lp_build_tgsi_context * bld_base,
4278 struct lp_build_emit_data * emit_data)
4279 {
4280 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4281
4282 lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc);
4283 }
4284
4285 static void
cont_emit(const struct lp_build_tgsi_action * action,struct lp_build_tgsi_context * bld_base,struct lp_build_emit_data * emit_data)4286 cont_emit(
4287 const struct lp_build_tgsi_action * action,
4288 struct lp_build_tgsi_context * bld_base,
4289 struct lp_build_emit_data * emit_data)
4290 {
4291 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4292
4293 lp_exec_continue(&bld->exec_mask);
4294 }
4295
emit_prologue(struct lp_build_tgsi_context * bld_base)4296 static void emit_prologue(struct lp_build_tgsi_context * bld_base)
4297 {
4298 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4299 struct gallivm_state * gallivm = bld_base->base.gallivm;
4300
4301 if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) {
4302 unsigned array_size = bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4;
4303 bld->temps_array_type = LLVMArrayType(bld_base->base.vec_type, array_size);
4304 bld->temps_array = lp_build_alloca_undef(gallivm,
4305 LLVMArrayType(bld_base->base.vec_type, array_size),
4306 "temp_array");
4307 }
4308
4309 if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) {
4310 LLVMValueRef array_size =
4311 lp_build_const_int32(gallivm,
4312 bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4);
4313 bld->outputs_array_type = bld_base->base.vec_type;
4314 bld->outputs_array = lp_build_array_alloca(gallivm,
4315 bld_base->base.vec_type, array_size,
4316 "output_array");
4317 }
4318
4319 if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) {
4320 unsigned array_size = bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4;
4321 bld->imms_array = lp_build_alloca_undef(gallivm,
4322 LLVMArrayType(bld_base->base.vec_type, array_size),
4323 "imms_array");
4324 }
4325
4326 /* If we have indirect addressing in inputs we need to copy them into
4327 * our alloca array to be able to iterate over them */
4328 if (bld->indirect_files & (1 << TGSI_FILE_INPUT) &&
4329 !bld->gs_iface && !bld->tes_iface && !bld->tcs_iface) {
4330 unsigned index, chan;
4331 LLVMTypeRef vec_type = bld_base->base.vec_type;
4332 LLVMValueRef array_size = lp_build_const_int32(gallivm,
4333 bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4);
4334 bld->inputs_array = lp_build_array_alloca(gallivm,
4335 vec_type, array_size,
4336 "input_array");
4337
4338 assert(bld_base->info->num_inputs
4339 <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1);
4340
4341 for (index = 0; index < bld_base->info->num_inputs; ++index) {
4342 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
4343 LLVMValueRef lindex =
4344 lp_build_const_int32(gallivm, index * 4 + chan);
4345 LLVMValueRef input_ptr =
4346 LLVMBuildGEP2(gallivm->builder,
4347 bld->bld_base.base.vec_type,
4348 bld->inputs_array,
4349 &lindex, 1, "");
4350 LLVMValueRef value = bld->inputs[index][chan];
4351 if (value)
4352 LLVMBuildStore(gallivm->builder, value, input_ptr);
4353 }
4354 }
4355 }
4356
4357 if (bld->gs_iface) {
4358 struct lp_build_context *uint_bld = &bld->bld_base.uint_bld;
4359 bld->emitted_prims_vec_ptr =
4360 lp_build_alloca(gallivm,
4361 uint_bld->vec_type,
4362 "emitted_prims_ptr");
4363 bld->emitted_vertices_vec_ptr =
4364 lp_build_alloca(gallivm,
4365 uint_bld->vec_type,
4366 "emitted_vertices_ptr");
4367 bld->total_emitted_vertices_vec_ptr =
4368 lp_build_alloca(gallivm,
4369 uint_bld->vec_type,
4370 "total_emitted_vertices_ptr");
4371
4372 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4373 bld->emitted_prims_vec_ptr);
4374 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4375 bld->emitted_vertices_vec_ptr);
4376 LLVMBuildStore(gallivm->builder, uint_bld->zero,
4377 bld->total_emitted_vertices_vec_ptr);
4378 }
4379
4380 if (DEBUG_EXECUTION) {
4381 lp_build_printf(gallivm, "\n");
4382 emit_dump_file(bld, TGSI_FILE_CONSTANT);
4383 if (!bld->gs_iface)
4384 emit_dump_file(bld, TGSI_FILE_INPUT);
4385 }
4386 }
4387
emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)4388 static void emit_prologue_post_decl(struct lp_build_tgsi_context * bld_base)
4389 {
4390 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4391
4392 if (bld->tcs_iface && bld->tcs_iface->emit_prologue) {
4393 bld->tcs_iface->emit_prologue((struct lp_build_context*)bld_base);
4394 }
4395 }
4396
emit_epilogue(struct lp_build_tgsi_context * bld_base)4397 static void emit_epilogue(struct lp_build_tgsi_context * bld_base)
4398 {
4399 struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base);
4400 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
4401
4402 if (DEBUG_EXECUTION) {
4403 /* for debugging */
4404 if (0) {
4405 emit_dump_file(bld, TGSI_FILE_TEMPORARY);
4406 }
4407 emit_dump_file(bld, TGSI_FILE_OUTPUT);
4408 lp_build_printf(bld_base->base.gallivm, "\n");
4409 }
4410
4411 if (bld->tcs_iface && bld->tcs_iface->emit_epilogue) {
4412 bld->tcs_iface->emit_epilogue((struct lp_build_context*)bld_base);
4413 }
4414
4415 /* If we have indirect addressing in outputs we need to copy our alloca array
4416 * to the outputs slots specified by the caller */
4417 if (bld->gs_iface) {
4418 LLVMValueRef total_emitted_vertices_vec;
4419 LLVMValueRef emitted_prims_vec;
4420 /* implicit end_primitives, needed in case there are any unflushed
4421 vertices in the cache. Note must not call end_primitive here
4422 since the exec_mask is not valid at this point. */
4423 end_primitive_masked(bld_base, lp_build_mask_value(bld->mask));
4424
4425 total_emitted_vertices_vec =
4426 LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type,
4427 bld->total_emitted_vertices_vec_ptr, "");
4428 emitted_prims_vec =
4429 LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type,
4430 bld->emitted_prims_vec_ptr, "");
4431
4432 bld->gs_iface->gs_epilogue(bld->gs_iface,
4433 total_emitted_vertices_vec,
4434 emitted_prims_vec, 0);
4435 } else {
4436 gather_outputs(bld);
4437 }
4438 }
4439
4440 void
lp_build_tgsi_soa(struct gallivm_state * gallivm,const struct tgsi_token * tokens,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[TGSI_NUM_CHANNELS])4441 lp_build_tgsi_soa(struct gallivm_state *gallivm,
4442 const struct tgsi_token *tokens,
4443 const struct lp_build_tgsi_params *params,
4444 LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS])
4445 {
4446 struct lp_build_tgsi_soa_context bld;
4447 struct lp_type type = params->type;
4448 struct lp_type res_type;
4449
4450 assert(type.length <= LP_MAX_VECTOR_LENGTH);
4451 memset(&res_type, 0, sizeof res_type);
4452 res_type.width = type.width;
4453 res_type.length = type.length;
4454 res_type.sign = 1;
4455
4456 /* Setup build context */
4457 memset(&bld, 0, sizeof bld);
4458 lp_build_context_init(&bld.bld_base.base, gallivm, type);
4459 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
4460 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
4461 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
4462 {
4463 struct lp_type dbl_type;
4464 dbl_type = type;
4465 dbl_type.width *= 2;
4466 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
4467 }
4468 {
4469 struct lp_type uint64_type;
4470 uint64_type = lp_uint_type(type);
4471 uint64_type.width *= 2;
4472 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
4473 }
4474 {
4475 struct lp_type int64_type;
4476 int64_type = lp_int_type(type);
4477 int64_type.width *= 2;
4478 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
4479 }
4480 bld.mask = params->mask;
4481 bld.inputs = params->inputs;
4482 bld.outputs = outputs;
4483 bld.consts_ptr = params->consts_ptr;
4484 bld.ssbo_ptr = params->ssbo_ptr;
4485 bld.sampler = params->sampler;
4486 bld.bld_base.info = params->info;
4487 bld.indirect_files = params->info->indirect_files;
4488 bld.context_type = params->context_type;
4489 bld.context_ptr = params->context_ptr;
4490 bld.resources_type = params->resources_type;
4491 bld.resources_ptr = params->resources_ptr;
4492 bld.thread_data_type = params->thread_data_type;
4493 bld.thread_data_ptr = params->thread_data_ptr;
4494 bld.image = params->image;
4495 bld.shared_ptr = params->shared_ptr;
4496 bld.coro = params->coro;
4497
4498 /*
4499 * If the number of temporaries is rather large then we just
4500 * allocate them as an array right from the start and treat
4501 * like indirect temporaries.
4502 */
4503 if (params->info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) {
4504 bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY);
4505 }
4506 /*
4507 * For performance reason immediates are always backed in a static
4508 * array, but if their number is too great, we have to use just
4509 * a dynamically allocated array.
4510 */
4511 bld.use_immediates_array =
4512 (params->info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES);
4513 if (bld.use_immediates_array) {
4514 bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE);
4515 }
4516
4517
4518 bld.bld_base.soa = true;
4519 bld.bld_base.emit_debug = emit_debug;
4520 bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant;
4521 bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate;
4522 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input;
4523 bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary;
4524 bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value;
4525
4526 bld.bld_base.emit_store = emit_store;
4527 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_output;
4528 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_TEMPORARY] = emit_store_temp;
4529 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_ADDRESS] = emit_store_address;
4530
4531 bld.bld_base.emit_declaration = lp_emit_declaration_soa;
4532 bld.bld_base.emit_immediate = lp_emit_immediate_soa;
4533
4534 bld.bld_base.emit_prologue = emit_prologue;
4535 bld.bld_base.emit_prologue_post_decl = emit_prologue_post_decl;
4536 bld.bld_base.emit_epilogue = emit_epilogue;
4537
4538 /* Set opcode actions */
4539 lp_set_default_actions_cpu(&bld.bld_base);
4540
4541 bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit;
4542 bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit;
4543 bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit;
4544 bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit;
4545 bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit;
4546 bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit;
4547 bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit;
4548 bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit;
4549 bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit;
4550 bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit;
4551 bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit;
4552 bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit;
4553 bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit;
4554 bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit;
4555 bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit;
4556 bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit;
4557 bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit;
4558 bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit;
4559 bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit;
4560 bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit;
4561 bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit;
4562 bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit;
4563 bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit;
4564 bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit;
4565 bld.bld_base.op_actions[TGSI_OPCODE_TEX_LZ].emit = txl_emit;
4566 bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit;
4567 bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit;
4568 bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit;
4569 bld.bld_base.op_actions[TGSI_OPCODE_TXF_LZ].emit = txf_emit;
4570 bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit;
4571 bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit;
4572 bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit;
4573 bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit;
4574 bld.bld_base.op_actions[TGSI_OPCODE_LODQ].emit = lodq_emit;
4575 /* DX10 sampling ops */
4576 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit;
4577 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit;
4578 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit;
4579 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit;
4580 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit;
4581 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit;
4582 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I_MS].emit = sample_i_emit;
4583 bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit;
4584 bld.bld_base.op_actions[TGSI_OPCODE_GATHER4].emit = gather4_emit;
4585 bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit;
4586 bld.bld_base.op_actions[TGSI_OPCODE_LOD].emit = lod_emit;
4587
4588 bld.bld_base.op_actions[TGSI_OPCODE_LOAD].emit = load_emit;
4589 bld.bld_base.op_actions[TGSI_OPCODE_STORE].emit = store_emit;
4590 bld.bld_base.op_actions[TGSI_OPCODE_RESQ].emit = resq_emit;
4591
4592 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUADD].emit = atomic_emit;
4593 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXCHG].emit = atomic_emit;
4594 bld.bld_base.op_actions[TGSI_OPCODE_ATOMCAS].emit = atomic_emit;
4595 bld.bld_base.op_actions[TGSI_OPCODE_ATOMAND].emit = atomic_emit;
4596 bld.bld_base.op_actions[TGSI_OPCODE_ATOMOR].emit = atomic_emit;
4597 bld.bld_base.op_actions[TGSI_OPCODE_ATOMXOR].emit = atomic_emit;
4598 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMIN].emit = atomic_emit;
4599 bld.bld_base.op_actions[TGSI_OPCODE_ATOMUMAX].emit = atomic_emit;
4600 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMIN].emit = atomic_emit;
4601 bld.bld_base.op_actions[TGSI_OPCODE_ATOMIMAX].emit = atomic_emit;
4602
4603 bld.bld_base.op_actions[TGSI_OPCODE_MEMBAR].emit = membar_emit;
4604 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit;
4605
4606 if (params->gs_iface) {
4607 /* There's no specific value for this because it should always
4608 * be set, but apps using ext_geometry_shader4 quite often
4609 * were forgetting so we're using MAX_VERTEX_VARYING from
4610 * that spec even though we could assert if it's not
4611 * set, but that's a lot uglier. */
4612 unsigned max_output_vertices;
4613
4614 /* inputs are always indirect with gs */
4615 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4616 bld.gs_iface = params->gs_iface;
4617 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input;
4618 bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex;
4619 bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive;
4620
4621 max_output_vertices =
4622 params->info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES];
4623 if (!max_output_vertices)
4624 max_output_vertices = 32;
4625
4626 bld.max_output_vertices_vec =
4627 lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
4628 max_output_vertices);
4629 }
4630
4631 if (params->tes_iface) {
4632 /* inputs are always indirect with tes */
4633 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4634 bld.tes_iface = params->tes_iface;
4635 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tes_input;
4636 }
4637
4638 if (params->tcs_iface) {
4639 bld.tcs_iface = params->tcs_iface;
4640 /* outputs and inputs are always indirect with tcs */
4641 bld.indirect_files |= (1 << TGSI_FILE_OUTPUT);
4642 bld.bld_base.emit_store_reg_funcs[TGSI_FILE_OUTPUT] = emit_store_tcs_output;
4643 bld.indirect_files |= (1 << TGSI_FILE_INPUT);
4644 bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_tcs_input;
4645 bld.bld_base.emit_fetch_funcs[TGSI_FILE_OUTPUT] = emit_fetch_tcs_input;
4646 bld.bld_base.op_actions[TGSI_OPCODE_BARRIER].emit = barrier_emit_tcs;
4647 }
4648
4649 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
4650
4651 bld.system_values = *params->system_values;
4652
4653 lp_build_tgsi_llvm(&bld.bld_base, tokens);
4654
4655 if (0) {
4656 LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder);
4657 LLVMValueRef function = LLVMGetBasicBlockParent(block);
4658 debug_printf("11111111111111111111111111111 \n");
4659 tgsi_dump(tokens, 0);
4660 lp_debug_dump_value(function);
4661 debug_printf("2222222222222222222222222222 \n");
4662 }
4663
4664 if (0) {
4665 LLVMModuleRef module = LLVMGetGlobalParent(
4666 LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder)));
4667 LLVMDumpModule(module);
4668
4669 }
4670 lp_exec_mask_fini(&bld.exec_mask);
4671 }
4672