xref: /aosp_15_r20/external/mesa3d/src/gallium/auxiliary/gallivm/lp_bld_nir_soa.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2019 Red Hat.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included
14  * in all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22  * SOFTWARE.
23  *
24  **************************************************************************/
25 
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41 
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44    switch (bit_size) {
45    case 64:
46       return 3;
47    default:
48    case 32:
49       return 2;
50    case 16:
51       return 1;
52    case 8:
53       return 0;
54    }
55 }
56 
57 /*
58  * combine the execution mask if there is one with the current mask.
59  */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
64    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
65    struct lp_exec_mask *exec_mask = &bld->exec_mask;
66    LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67    if (!exec_mask->has_mask) {
68       return bld_mask;
69    }
70    if (!bld_mask)
71       return exec_mask->exec_mask;
72    return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
73                        exec_mask->exec_mask, "");
74 }
75 
76 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)77 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
78 {
79    struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
80 
81    /* Fragment shaders may dispatch with invocation 0 inactive.  All other
82     * stages have invocation 0 active at the top.  (See
83     * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
84     */
85    if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
86       return false;
87 
88    /* If we're in some control flow right now, then invocation 0 may be
89     * disabled.
90     */
91    if (bld->exec_mask.has_mask)
92       return false;
93 
94    return true;
95 }
96 
97 /**
98  * Returns a scalar value of the first active invocation in the exec_mask.
99  *
100  * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
101  * branches thare are all false, or portions of a loop after a break/continue
102  * has ended the last invocation that had been active in the loop).  In that
103  * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
104  * the first_active_invocation (such as in memory loads, texture unit index
105  * lookups, etc) will use a valid index
106  */
first_active_invocation(struct lp_build_nir_context * bld_base)107 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
108 {
109    struct gallivm_state *gallivm = bld_base->base.gallivm;
110    LLVMBuilderRef builder = gallivm->builder;
111    struct lp_build_context *uint_bld = &bld_base->uint_bld;
112 
113    if (invocation_0_must_be_active(bld_base))
114       return lp_build_const_int32(gallivm, 0);
115 
116    LLVMValueRef exec_mask = mask_vec(bld_base);
117 
118    LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
119    /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
120     * cttz intrinsic -- I assume the compiler will drop the extend if there are
121     * smaller instructions available, since we have is_zero_poison.
122     */
123    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
124    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
125 
126    LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
127 
128    LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
129                                                          LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
130 
131    return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
132 }
133 
134 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)135 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
136 {
137    if (bit_size == 64)
138       return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
139    else if (bit_size == 16)
140       return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
141    else if (bit_size == 8)
142       return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
143    else
144       return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
145 }
146 
147 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)148 emit_fetch_64bit(
149    struct lp_build_nir_context * bld_base,
150    LLVMValueRef input,
151    LLVMValueRef input2)
152 {
153    struct gallivm_state *gallivm = bld_base->base.gallivm;
154    LLVMBuilderRef builder = gallivm->builder;
155    LLVMValueRef res;
156    int i;
157    LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
158    int len = bld_base->base.type.length * 2;
159    assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
160 
161    for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
162 #if UTIL_ARCH_LITTLE_ENDIAN
163       shuffles[i] = lp_build_const_int32(gallivm, i / 2);
164       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
165 #else
166       shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
167       shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
168 #endif
169    }
170    res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
171 
172    return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
173 }
174 
175 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])176 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
177                        LLVMValueRef value,
178                        LLVMValueRef split_values[2])
179 {
180    struct gallivm_state *gallivm = bld_base->base.gallivm;
181    LLVMBuilderRef builder = gallivm->builder;
182    unsigned i;
183    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
184    LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
185    int len = bld_base->base.type.length * 2;
186 
187    value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
188    for (i = 0; i < bld_base->base.type.length; i++) {
189 #if UTIL_ARCH_LITTLE_ENDIAN
190       shuffles[i] = lp_build_const_int32(gallivm, i * 2);
191       shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
192 #else
193       shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
194       shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
195 #endif
196    }
197 
198    split_values[0] = LLVMBuildShuffleVector(builder, value,
199                                  LLVMGetUndef(LLVMTypeOf(value)),
200                                  LLVMConstVector(shuffles,
201                                                  bld_base->base.type.length),
202                                  "");
203    split_values[1] = LLVMBuildShuffleVector(builder, value,
204                                   LLVMGetUndef(LLVMTypeOf(value)),
205                                   LLVMConstVector(shuffles2,
206                                                   bld_base->base.type.length),
207                                   "");
208 }
209 
210 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)211 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
212                       LLVMValueRef chan_ptr,
213                       LLVMValueRef chan_ptr2,
214                       LLVMValueRef value)
215 {
216    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
217    struct lp_build_context *float_bld = &bld_base->base;
218    LLVMValueRef split_vals[2];
219 
220    emit_store_64bit_split(bld_base, value, split_vals);
221 
222    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
223    lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
224 }
225 
226 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)227 get_soa_array_offsets(struct lp_build_context *uint_bld,
228                       LLVMValueRef indirect_index,
229                       int num_components,
230                       unsigned chan_index,
231                       bool need_perelement_offset)
232 {
233    struct gallivm_state *gallivm = uint_bld->gallivm;
234    LLVMValueRef chan_vec =
235       lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
236    LLVMValueRef length_vec =
237       lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
238    LLVMValueRef index_vec;
239 
240    /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
241    index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
242    index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
243    index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
244 
245    if (need_perelement_offset) {
246       LLVMValueRef pixel_offsets;
247       unsigned i;
248      /* build pixel offset vector: {0, 1, 2, 3, ...} */
249       pixel_offsets = uint_bld->undef;
250       for (i = 0; i < uint_bld->type.length; i++) {
251          LLVMValueRef ii = lp_build_const_int32(gallivm, i);
252          pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
253                                                 ii, ii, "");
254       }
255       index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
256    }
257    return index_vec;
258 }
259 
260 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)261 build_gather(struct lp_build_nir_context *bld_base,
262              struct lp_build_context *bld,
263              LLVMTypeRef base_type,
264              LLVMValueRef base_ptr,
265              LLVMValueRef indexes,
266              LLVMValueRef overflow_mask,
267              LLVMValueRef indexes2)
268 {
269    struct gallivm_state *gallivm = bld_base->base.gallivm;
270    LLVMBuilderRef builder = gallivm->builder;
271    struct lp_build_context *uint_bld = &bld_base->uint_bld;
272    LLVMValueRef res;
273    unsigned i;
274 
275    if (indexes2)
276       res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
277    else
278       res = bld->undef;
279    /*
280     * overflow_mask is a vector telling us which channels
281     * in the vector overflowed. We use the overflow behavior for
282     * constant buffers which is defined as:
283     * Out of bounds access to constant buffer returns 0 in all
284     * components. Out of bounds behavior is always with respect
285     * to the size of the buffer bound at that slot.
286     */
287 
288    if (overflow_mask) {
289       /*
290        * We avoid per-element control flow here (also due to llvm going crazy,
291        * though I suspect it's better anyway since overflow is likely rare).
292        * Note that since we still fetch from buffers even if num_elements was
293        * zero (in this case we'll fetch from index zero) the jit func callers
294        * MUST provide valid fake constant buffers of size 4x32 (the values do
295        * not matter), otherwise we'd still need (not per element though)
296        * control flow.
297        */
298       indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
299       if (indexes2)
300          indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
301    }
302 
303    /*
304     * Loop over elements of index_vec, load scalar value, insert it into 'res'.
305     */
306    for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
307       LLVMValueRef si, di;
308       LLVMValueRef index;
309       LLVMValueRef scalar_ptr, scalar;
310 
311       di = lp_build_const_int32(gallivm, i);
312       if (indexes2)
313          si = lp_build_const_int32(gallivm, i >> 1);
314       else
315          si = di;
316 
317       if (indexes2 && (i & 1)) {
318          index = LLVMBuildExtractElement(builder,
319                                          indexes2, si, "");
320       } else {
321          index = LLVMBuildExtractElement(builder,
322                                          indexes, si, "");
323       }
324 
325       scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
326       scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
327 
328       res = LLVMBuildInsertElement(builder, res, scalar, di, "");
329    }
330 
331    if (overflow_mask) {
332       if (indexes2) {
333          res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
334          overflow_mask = LLVMBuildSExt(builder, overflow_mask,
335                                        bld_base->dbl_bld.int_vec_type, "");
336          res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
337                                bld_base->dbl_bld.zero, res);
338       } else
339          res = lp_build_select(bld, overflow_mask, bld->zero, res);
340    }
341 
342    return res;
343 }
344 
345 /**
346  * Scatter/store vector.
347  */
348 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)349 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
350                   LLVMValueRef base_ptr,
351                   LLVMValueRef indexes,
352                   LLVMValueRef values,
353                   struct lp_exec_mask *mask)
354 {
355    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
356    LLVMBuilderRef builder = gallivm->builder;
357    unsigned i;
358    LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
359 
360    /*
361     * Loop over elements of index_vec, store scalar value.
362     */
363    for (i = 0; i < bld->bld_base.base.type.length; i++) {
364       LLVMValueRef ii = lp_build_const_int32(gallivm, i);
365       LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
366       LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
367       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
368       LLVMValueRef scalar_pred = pred ?
369          LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
370 
371       if (0)
372          lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
373                          ii, val, index, scalar_ptr);
374 
375       if (scalar_pred) {
376          LLVMValueRef real_val, dst_val;
377          dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
378          scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
379          real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
380          LLVMBuildStore(builder, real_val, scalar_ptr);
381       }
382       else {
383          LLVMBuildStore(builder, val, scalar_ptr);
384       }
385    }
386 }
387 
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])388 static void emit_load_var(struct lp_build_nir_context *bld_base,
389                            nir_variable_mode deref_mode,
390                            unsigned num_components,
391                            unsigned bit_size,
392                            nir_variable *var,
393                            unsigned vertex_index,
394                            LLVMValueRef indir_vertex_index,
395                            unsigned const_index,
396                            LLVMValueRef indir_index,
397                            LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
398 {
399    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
400    struct gallivm_state *gallivm = bld_base->base.gallivm;
401    int dmul = bit_size == 64 ? 2 : 1;
402    unsigned location = var->data.driver_location;
403    unsigned location_frac = var->data.location_frac;
404 
405    if (!var->data.compact && !indir_index)
406       location += const_index;
407    else if (var->data.compact) {
408       location += const_index / 4;
409       location_frac += const_index % 4;
410       const_index = 0;
411    }
412    switch (deref_mode) {
413    case nir_var_shader_in:
414       for (unsigned i = 0; i < num_components; i++) {
415          int idx = (i * dmul) + location_frac;
416          int comp_loc = location;
417 
418          if (bit_size == 64 && idx >= 4) {
419             comp_loc++;
420             idx = idx % 4;
421          }
422 
423          if (bld->gs_iface) {
424             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
425             LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
426             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
427             LLVMValueRef result2;
428 
429             result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
430                                                    false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
431             if (bit_size == 64) {
432                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
433                result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
434                                                     false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
435                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436             }
437          } else if (bld->tes_iface) {
438             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
439             LLVMValueRef attrib_index_val;
440             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
441             LLVMValueRef result2;
442 
443             if (indir_index) {
444                if (var->data.compact) {
445                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447                } else
448                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449             } else
450                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451 
452             if (var->data.patch) {
453                result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
454                                                              indir_index ? true : false, attrib_index_val, swizzle_index_val);
455                if (bit_size == 64) {
456                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457                   result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
458                                                               indir_index ? true : false, attrib_index_val, swizzle_index_val);
459                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
460                }
461             }
462             else {
463                result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
464                                                               indir_vertex_index ? true : false,
465                                                               indir_vertex_index ? indir_vertex_index : vertex_index_val,
466                                                               (indir_index && !var->data.compact) ? true : false, attrib_index_val,
467                                                               (indir_index && var->data.compact) ? true : false, swizzle_index_val);
468                if (bit_size == 64) {
469                   LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
470                   result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
471                                                                indir_vertex_index ? true : false,
472                                                                indir_vertex_index ? indir_vertex_index : vertex_index_val,
473                                                                indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
474                   result[i] = emit_fetch_64bit(bld_base, result[i], result2);
475                }
476             }
477          } else if (bld->tcs_iface) {
478             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
479             LLVMValueRef attrib_index_val;
480             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
481 
482             if (indir_index) {
483                if (var->data.compact) {
484                   swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
485                   attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
486                } else
487                   attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
488             } else
489                attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
490             result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
491                                                          indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492                                                          (indir_index && !var->data.compact) ? true : false, attrib_index_val,
493                                                          (indir_index && var->data.compact) ? true : false, swizzle_index_val);
494             if (bit_size == 64) {
495                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
497                                                                        indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498                                                                        indir_index ? true : false, attrib_index_val,
499                                                                        false, swizzle_index_val);
500                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501             }
502          } else {
503             if (indir_index) {
504                LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
505                LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
506                                                               attrib_index_val, 4, idx,
507                                                               true);
508                LLVMValueRef index_vec2 = NULL;
509                LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
510                LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
511 
512                if (bit_size == 64)
513                   index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
514                                                      indir_index, 4, idx + 1, true);
515 
516                /* Gather values from the input register array */
517                result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
518             } else {
519                if (bld->indirects & nir_var_shader_in) {
520                   LLVMValueRef lindex = lp_build_const_int32(gallivm,
521                                                              comp_loc * 4 + idx);
522                   LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
523                                                                  bld->bld_base.base.vec_type,
524                                                                  bld->inputs_array, lindex);
525                   if (bit_size == 64) {
526                      LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
527                                                                  comp_loc * 4 + (idx + 1));
528                      LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
529                                                                      bld->bld_base.base.vec_type,
530                                                                      bld->inputs_array, lindex2);
531                      result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
532                   } else {
533                      result[i] = input_ptr;
534                   }
535                } else {
536                   if (bit_size == 64) {
537                      LLVMValueRef tmp[2];
538                      tmp[0] = bld->inputs[comp_loc][idx];
539                      tmp[1] = bld->inputs[comp_loc][idx + 1];
540                      result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
541                   } else {
542                      result[i] = bld->inputs[comp_loc][idx];
543                   }
544                }
545             }
546          }
547       }
548       break;
549    case nir_var_shader_out:
550       if (bld->fs_iface && bld->fs_iface->fb_fetch) {
551          bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
552          return;
553       }
554       for (unsigned i = 0; i < num_components; i++) {
555          int idx = (i * dmul) + location_frac;
556          if (bld->tcs_iface) {
557             LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
558             LLVMValueRef attrib_index_val;
559             LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
560 
561             if (indir_index)
562                attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
563             else
564                attrib_index_val = lp_build_const_int32(gallivm, location);
565 
566             result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
567                                                           indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
568                                                           (indir_index && !var->data.compact) ? true : false, attrib_index_val,
569                                                           (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
570             if (bit_size == 64) {
571                LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
572                LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
573                                                                         indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
574                                                                         indir_index ? true : false, attrib_index_val,
575                                                                         false, swizzle_index_val, 0);
576                result[i] = emit_fetch_64bit(bld_base, result[i], result2);
577             }
578          }
579       }
580       break;
581    default:
582       break;
583    }
584 }
585 
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)586 static void emit_store_chan(struct lp_build_nir_context *bld_base,
587                             nir_variable_mode deref_mode,
588                             unsigned bit_size,
589                             unsigned location, unsigned comp,
590                             unsigned chan,
591                             LLVMValueRef dst)
592 {
593    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
594    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
595    struct lp_build_context *float_bld = &bld_base->base;
596 
597    if (bit_size == 64) {
598       chan *= 2;
599       chan += comp;
600       if (chan >= 4) {
601          chan -= 4;
602          location++;
603       }
604       emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
605                             bld->outputs[location][chan + 1], dst);
606    } else {
607       dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
608       lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
609                          bld->outputs[location][chan + comp]);
610    }
611 }
612 
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)613 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
614                                 bool is_compact,
615                                 unsigned bit_size,
616                                 unsigned location,
617                                 unsigned const_index,
618                                 LLVMValueRef indir_vertex_index,
619                                 LLVMValueRef indir_index,
620                                 unsigned comp,
621                                 unsigned chan,
622                                 LLVMValueRef chan_val)
623 {
624    struct gallivm_state *gallivm = bld_base->base.gallivm;
625    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
626    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
627    unsigned swizzle = chan;
628    if (bit_size == 64) {
629       swizzle *= 2;
630       swizzle += comp;
631       if (swizzle >= 4) {
632          swizzle -= 4;
633          location++;
634       }
635    } else
636       swizzle += comp;
637    LLVMValueRef attrib_index_val;
638    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
639 
640    if (indir_index) {
641       if (is_compact) {
642          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
643          attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
644       } else
645          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
646    } else
647       attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
648    LLVMValueRef exec_mask = mask_vec(bld_base);
649    if (bit_size == 64) {
650       LLVMValueRef split_vals[2];
651       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
652       emit_store_64bit_split(bld_base, chan_val, split_vals);
653       if (bld->mesh_iface) {
654          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
655                                            indir_vertex_index ? true : false,
656                                            indir_vertex_index,
657                                            indir_index ? true : false,
658                                            attrib_index_val,
659                                            false, swizzle_index_val,
660                                            split_vals[0], exec_mask);
661          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
662                                            indir_vertex_index ? true : false,
663                                            indir_vertex_index,
664                                            indir_index ? true : false,
665                                            attrib_index_val,
666                                            false, swizzle_index_val2,
667                                            split_vals[1], exec_mask);
668       } else {
669          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
670                                            indir_vertex_index ? true : false,
671                                            indir_vertex_index,
672                                            indir_index ? true : false,
673                                            attrib_index_val,
674                                            false, swizzle_index_val,
675                                            split_vals[0], exec_mask);
676          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
677                                            indir_vertex_index ? true : false,
678                                            indir_vertex_index,
679                                            indir_index ? true : false,
680                                            attrib_index_val,
681                                            false, swizzle_index_val2,
682                                            split_vals[1], exec_mask);
683       }
684    } else {
685       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
686       if (bld->mesh_iface) {
687          bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
688                                            indir_vertex_index ? true : false,
689                                            indir_vertex_index,
690                                            indir_index && !is_compact ? true : false,
691                                            attrib_index_val,
692                                            indir_index && is_compact ? true : false,
693                                            swizzle_index_val,
694                                            chan_val, exec_mask);
695       } else {
696          bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
697                                            indir_vertex_index ? true : false,
698                                            indir_vertex_index,
699                                            indir_index && !is_compact ? true : false,
700                                            attrib_index_val,
701                                            indir_index && is_compact ? true : false,
702                                            swizzle_index_val,
703                                            chan_val, exec_mask);
704       }
705    }
706 }
707 
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)708 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
709                                  bool is_compact,
710                                  unsigned bit_size,
711                                  unsigned location,
712                                  unsigned const_index,
713                                  LLVMValueRef indir_vertex_index,
714                                  LLVMValueRef indir_index,
715                                  unsigned comp,
716                                  unsigned chan,
717                                  LLVMValueRef chan_val)
718 {
719    struct gallivm_state *gallivm = bld_base->base.gallivm;
720    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
721    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
722    unsigned swizzle = chan;
723    if (bit_size == 64) {
724       swizzle += const_index;
725       swizzle *= 2;
726       swizzle += comp;
727       if (swizzle >= 4) {
728          swizzle -= 4;
729          location++;
730       }
731    } else
732       swizzle += comp;
733    LLVMValueRef attrib_index_val;
734    LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
735 
736    if (indir_index) {
737       if (is_compact) {
738          swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
739          attrib_index_val = lp_build_const_int32(gallivm, location);
740       } else
741          attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
742    } else
743       attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
744    LLVMValueRef exec_mask = mask_vec(bld_base);
745    if (bit_size == 64) {
746       LLVMValueRef split_vals[2];
747       LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
748       emit_store_64bit_split(bld_base, chan_val, split_vals);
749       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
750                                          indir_vertex_index ? true : false,
751                                          indir_vertex_index,
752                                          indir_index ? true : false,
753                                          attrib_index_val,
754                                          false, swizzle_index_val,
755                                          split_vals[0], exec_mask);
756       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
757                                          indir_vertex_index ? true : false,
758                                          indir_vertex_index,
759                                          indir_index ? true : false,
760                                          attrib_index_val,
761                                          false, swizzle_index_val2,
762                                          split_vals[1], exec_mask);
763    } else {
764       chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
765       bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
766                                          indir_vertex_index ? true : false,
767                                          indir_vertex_index,
768                                          indir_index && !is_compact ? true : false,
769                                          attrib_index_val,
770                                          indir_index && is_compact ? true : false,
771                                          swizzle_index_val,
772                                          chan_val, exec_mask);
773    }
774 }
775 
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)776 static void emit_store_var(struct lp_build_nir_context *bld_base,
777                            nir_variable_mode deref_mode,
778                            unsigned num_components,
779                            unsigned bit_size,
780                            nir_variable *var,
781                            unsigned writemask,
782                            LLVMValueRef indir_vertex_index,
783                            unsigned const_index,
784                            LLVMValueRef indir_index,
785                            LLVMValueRef dst)
786 {
787    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
788    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
789    switch (deref_mode) {
790    case nir_var_shader_out: {
791       unsigned location = var->data.driver_location;
792       unsigned comp = var->data.location_frac;
793       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
794          if (var->data.location == FRAG_RESULT_STENCIL)
795             comp = 1;
796          else if (var->data.location == FRAG_RESULT_DEPTH)
797             comp = 2;
798       }
799 
800       if (var->data.compact) {
801          location += const_index / 4;
802          comp += const_index % 4;
803          const_index = 0;
804       }
805 
806       for (unsigned chan = 0; chan < num_components; chan++) {
807          if (writemask & (1u << chan)) {
808             LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
809             if (bld->mesh_iface) {
810                emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
811             } else if (bld->tcs_iface) {
812                emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
813             } else
814                emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
815          }
816       }
817       break;
818    }
819    default:
820       break;
821    }
822 }
823 
824 /**
825  * Returns the address of the given constant array index and channel in a
826  * nir register.
827  */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)828 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
829                                            struct lp_build_context *reg_bld,
830                                            const nir_intrinsic_instr *decl,
831                                            LLVMValueRef reg_storage,
832                                            int array_index, int chan)
833 {
834    struct gallivm_state *gallivm = bld_base->base.gallivm;
835    int nc = nir_intrinsic_num_components(decl);
836    int num_array_elems = nir_intrinsic_num_array_elems(decl);
837 
838    LLVMTypeRef chan_type = reg_bld->vec_type;
839    if (nc > 1)
840       chan_type = LLVMArrayType(chan_type, nc);
841 
842    if (num_array_elems > 0) {
843       LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
844       reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
845                                             lp_build_const_int32(gallivm, array_index));
846    }
847    if (nc > 1) {
848       reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
849                                             lp_build_const_int32(gallivm, chan));
850    }
851 
852    return reg_storage;
853 }
854 
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)855 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
856                                   struct lp_build_context *reg_bld,
857                                   const nir_intrinsic_instr *decl,
858                                   unsigned base,
859                                   LLVMValueRef indir_src,
860                                   LLVMValueRef reg_storage)
861 {
862    struct gallivm_state *gallivm = bld_base->base.gallivm;
863    LLVMBuilderRef builder = gallivm->builder;
864    int nc = nir_intrinsic_num_components(decl);
865    int num_array_elems = nir_intrinsic_num_array_elems(decl);
866    LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
867    struct lp_build_context *uint_bld = &bld_base->uint_bld;
868    if (indir_src != NULL) {
869       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
870       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
871       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
872       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
873       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
874       for (unsigned i = 0; i < nc; i++) {
875          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
876          vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
877       }
878    } else {
879       for (unsigned i = 0; i < nc; i++) {
880          vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
881                                   reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
882                                                    base, i), "");
883       }
884    }
885    return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
886 }
887 
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])888 static void emit_store_reg(struct lp_build_nir_context *bld_base,
889                            struct lp_build_context *reg_bld,
890                            const nir_intrinsic_instr *decl,
891                            unsigned writemask,
892                            unsigned base,
893                            LLVMValueRef indir_src,
894                            LLVMValueRef reg_storage,
895                            LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
896 {
897    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
898    struct gallivm_state *gallivm = bld_base->base.gallivm;
899    LLVMBuilderRef builder = gallivm->builder;
900    struct lp_build_context *uint_bld = &bld_base->uint_bld;
901    int nc = nir_intrinsic_num_components(decl);
902    int num_array_elems = nir_intrinsic_num_array_elems(decl);
903    if (indir_src != NULL) {
904       LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
905       LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
906       indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
907       indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
908       reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
909       for (unsigned i = 0; i < nc; i++) {
910          if (!(writemask & (1 << i)))
911             continue;
912          LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
913          dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
914          emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
915       }
916       return;
917    }
918 
919    for (unsigned i = 0; i < nc; i++) {
920       if (!(writemask & (1 << i)))
921          continue;
922       dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
923       lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
924                          reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
925                                           base, i));
926    }
927 }
928 
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])929 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
930                                  unsigned nc,
931                                  unsigned bit_size,
932                                  unsigned offset_bit_size,
933                                  bool offset_is_uniform,
934                                  LLVMValueRef offset,
935                                  LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
936 {
937    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
938    struct gallivm_state *gallivm = bld_base->base.gallivm;
939    LLVMBuilderRef builder = gallivm->builder;
940    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
941    LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
942    unsigned size_shift = bit_size_to_shift_size(bit_size);
943    struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
944    if (size_shift)
945       offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
946 
947    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
948    kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
949 
950    if (offset_is_uniform) {
951       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
952 
953       for (unsigned c = 0; c < nc; c++) {
954          LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
955 
956          LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
957          result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
958       }
959    } else {
960       unreachable("load_kernel_arg must have a uniform offset.");
961    }
962 }
963 
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)964 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
965 {
966    LLVMBuilderRef builder = gallivm->builder;
967    switch (bit_size) {
968    case 8:
969       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
970       break;
971    case 16:
972       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
973       break;
974    case 32:
975    default:
976       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
977       break;
978    case 64:
979       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
980       break;
981    }
982    return addr_ptr;
983 }
984 
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)985 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
986 {
987    LLVMBuilderRef builder = gallivm->builder;
988    switch (bit_size) {
989    case 8:
990       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
991       break;
992    case 16:
993       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
994       break;
995    case 32:
996    default:
997       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
998       break;
999    case 64:
1000       addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1001       break;
1002    }
1003    return addr_ptr;
1004 }
1005 
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1006 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1007                                           unsigned bit_size,
1008                                           LLVMValueRef ptr,
1009                                           LLVMValueRef offset)
1010 {
1011    unsigned pointer_size = 8 * sizeof(void *);
1012    struct gallivm_state *gallivm = bld_base->base.gallivm;
1013    LLVMBuilderRef builder = gallivm->builder;
1014    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1015    struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1016    LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1017    if (pointer_size == 64)
1018       offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1019    result = LLVMBuildAdd(builder, offset, result, "");
1020    return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1021 }
1022 
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1023 static void emit_load_global(struct lp_build_nir_context *bld_base,
1024                              unsigned nc,
1025                              unsigned bit_size,
1026                              unsigned addr_bit_size,
1027                              bool offset_is_uniform,
1028                              LLVMValueRef addr,
1029                              LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1030 {
1031    struct gallivm_state *gallivm = bld_base->base.gallivm;
1032    LLVMBuilderRef builder = gallivm->builder;
1033    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1034    struct lp_build_context *res_bld;
1035    LLVMValueRef exec_mask = mask_vec(bld_base);
1036 
1037    res_bld = get_int_bld(bld_base, true, bit_size);
1038 
1039    /* Note, we don't use first_active_invocation here, since we aren't
1040     * guaranteed that there is actually an active invocation.
1041     */
1042    if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1043       /* If the offset is uniform, then use the address from invocation 0 to
1044        * load, and broadcast to all invocations.
1045        */
1046       LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1047                                                       lp_build_const_int32(gallivm, 0), "");
1048       addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1049 
1050       for (unsigned c = 0; c < nc; c++) {
1051          LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1052                                                      addr_ptr, lp_build_const_int32(gallivm, c));
1053          outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1054       }
1055       return;
1056    }
1057 
1058    for (unsigned c = 0; c < nc; c++) {
1059       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1060 
1061       outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length,
1062                                          bit_size,
1063                                          res_bld->vec_type,
1064                                          lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1065                                          exec_mask);
1066       outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1067    }
1068 }
1069 
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1070 static void emit_store_global(struct lp_build_nir_context *bld_base,
1071                               unsigned writemask,
1072                               unsigned nc, unsigned bit_size,
1073                               unsigned addr_bit_size,
1074                               LLVMValueRef addr,
1075                               LLVMValueRef dst)
1076 {
1077    struct gallivm_state *gallivm = bld_base->base.gallivm;
1078    LLVMBuilderRef builder = gallivm->builder;
1079    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1080    LLVMValueRef exec_mask = mask_vec(bld_base);
1081 
1082    for (unsigned c = 0; c < nc; c++) {
1083       if (!(writemask & (1u << c)))
1084          continue;
1085       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1086       LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1087 
1088       struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1089       val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1090       lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1091                               lp_vec_add_offset_ptr(bld_base, bit_size,
1092                                                     addr, chan_offset),
1093                               val, exec_mask);
1094    }
1095 }
1096 
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1097 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1098                                nir_atomic_op nir_op,
1099                                unsigned addr_bit_size,
1100                                unsigned val_bit_size,
1101                                LLVMValueRef addr,
1102                                LLVMValueRef val, LLVMValueRef val2,
1103                                LLVMValueRef *result)
1104 {
1105    struct gallivm_state *gallivm = bld_base->base.gallivm;
1106    LLVMBuilderRef builder = gallivm->builder;
1107    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1108    bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1109    struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1110    if (is_flt)
1111       val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1112 
1113    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1114                                            atom_bld->vec_type, "");
1115    LLVMValueRef exec_mask = mask_vec(bld_base);
1116    struct lp_build_loop_state loop_state;
1117    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1118 
1119    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1120                                                     loop_state.counter, "");
1121    value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1122 
1123    LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1124                                                    loop_state.counter, "");
1125    addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1126    struct lp_build_if_state ifthen;
1127    LLVMValueRef cond, temp_res;
1128    LLVMValueRef scalar;
1129    cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1130    cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1131    lp_build_if(&ifthen, gallivm, cond);
1132 
1133    addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1134    if (val2 != NULL /* compare-and-swap */) {
1135       LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1136                                                          loop_state.counter, "");
1137       cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1138       scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1139                                       cas_src_ptr,
1140                                       LLVMAtomicOrderingSequentiallyConsistent,
1141                                       LLVMAtomicOrderingSequentiallyConsistent,
1142                                       false);
1143       scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1144    } else {
1145       scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1146                                   addr_ptr, value_ptr,
1147                                   LLVMAtomicOrderingSequentiallyConsistent,
1148                                   false);
1149    }
1150    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1151    temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1152    LLVMBuildStore(builder, temp_res, atom_res);
1153    lp_build_else(&ifthen);
1154    temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1155    LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1156    temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1157    LLVMBuildStore(builder, temp_res, atom_res);
1158    lp_build_endif(&ifthen);
1159    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1160                           NULL, LLVMIntUGE);
1161    *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1162 }
1163 
1164 /* Returns a boolean for whether the offset is in range of the given limit for
1165  * SSBO/UBO dereferences.
1166  */
1167 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1168 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1169                    LLVMValueRef offset,
1170                    LLVMValueRef limit)
1171 {
1172    struct gallivm_state *gallivm = bld_base->base.gallivm;
1173    LLVMBuilderRef builder = gallivm->builder;
1174 
1175    LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1176    LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1177    LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1178    return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1179 }
1180 
1181 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1182 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1183 {
1184    struct gallivm_state *gallivm = bld_base->base.gallivm;
1185 
1186    LLVMValueRef invocation = first_active_invocation(bld_base);
1187 
1188    LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1189    set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1190 
1191    LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1192    binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1193 
1194    LLVMValueRef components[2] = { set, binding };
1195    return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1196 }
1197 
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1198 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1199                           unsigned nc,
1200                           unsigned bit_size,
1201                           bool offset_is_uniform,
1202                           LLVMValueRef index,
1203                           LLVMValueRef offset,
1204                           LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1205 {
1206    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1207       index = build_resource_to_scalar(bld_base, index);
1208 
1209    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1210    struct gallivm_state *gallivm = bld_base->base.gallivm;
1211    LLVMBuilderRef builder = gallivm->builder;
1212    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1213    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1214    LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1215    LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1216    unsigned size_shift = bit_size_to_shift_size(bit_size);
1217    if (size_shift)
1218       offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1219 
1220    LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1221    consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1222 
1223    if (offset_is_uniform) {
1224       offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1225       struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1226       switch (bit_size) {
1227       case 8:
1228          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1229          break;
1230       case 16:
1231          num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1232          break;
1233       case 64:
1234          num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1235          break;
1236       default: break;
1237       }
1238       for (unsigned c = 0; c < nc; c++) {
1239          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1240 
1241          LLVMValueRef scalar;
1242          /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1243          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1244          LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1245          LLVMBuildStore(builder, zero, res_store);
1246 
1247          struct lp_build_if_state ifthen;
1248          lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1249          LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1250                                                        consts_ptr, chan_offset), res_store);
1251          lp_build_endif(&ifthen);
1252 
1253          scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1254 
1255          result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1256       }
1257    } else {
1258       LLVMValueRef overflow_mask;
1259 
1260       num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1261       if (bit_size == 64)
1262          num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1263       else if (bit_size == 16)
1264          num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1265       else if (bit_size == 8)
1266          num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1267 
1268       for (unsigned c = 0; c < nc; c++) {
1269          LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1270          overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1271                                           this_offset, num_consts);
1272          result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1273       }
1274    }
1275 }
1276 
1277 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1278 emit_load_const(struct lp_build_nir_context *bld_base,
1279                 const nir_load_const_instr *instr,
1280                 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1281 {
1282    struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1283    const unsigned bits = instr->def.bit_size;
1284 
1285    for (unsigned i = 0; i < instr->def.num_components; i++) {
1286      outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1287                                         bits == 32 ? instr->value[i].u32
1288                                                    : instr->value[i].u64);
1289    }
1290    for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1291       outval[i] = NULL;
1292    }
1293 }
1294 
1295 /**
1296  * Get the base address of SSBO[@index] for the @invocation channel, returning
1297  * the address and also the bounds (in units of the bit_size).
1298  */
1299 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1300 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1301                   unsigned bit_size,
1302                   LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1303 {
1304    struct gallivm_state *gallivm = bld_base->base.gallivm;
1305    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1306    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1307 
1308    LLVMValueRef ssbo_idx;
1309    LLVMValueRef buffers;
1310    uint32_t buffers_limit;
1311    if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1312       LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1313       set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1314 
1315       LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1316       binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1317 
1318       LLVMValueRef components[2] = { set, binding };
1319       ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1320 
1321       buffers = bld->consts_ptr;
1322       buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1323    } else {
1324       ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1325 
1326       buffers = bld->ssbo_ptr;
1327       buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1328    }
1329 
1330    LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1331    LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1332    if (bounds)
1333       *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1334 
1335    return ssbo_ptr;
1336 }
1337 
1338 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1339 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1340                         struct lp_build_context *mem_bld,
1341                         unsigned bit_size, bool payload,
1342                         LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1343 {
1344    struct gallivm_state *gallivm = bld_base->base.gallivm;
1345    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1346    LLVMValueRef ptr;
1347 
1348    if (index) {
1349       ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1350    } else {
1351       if (payload) {
1352          ptr = bld->payload_ptr;
1353          ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1354          ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1355          ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1356       }
1357       else
1358          ptr = bld->shared_ptr;
1359       *bounds = NULL;
1360    }
1361 
1362    /* Cast it to the pointer type of the access this instruction is doing. */
1363    if (bit_size == 32 && !mem_bld->type.floating)
1364       return ptr;
1365    else
1366       return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1367 }
1368 
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1369 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1370                           unsigned nc,
1371                           unsigned bit_size,
1372                           bool index_and_offset_are_uniform,
1373                           bool payload,
1374                           LLVMValueRef index,
1375                           LLVMValueRef offset,
1376                           LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1377 {
1378    struct gallivm_state *gallivm = bld_base->base.gallivm;
1379    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1380    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1381    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1382    struct lp_build_context *load_bld;
1383    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1384 
1385    load_bld = get_int_bld(bld_base, true, bit_size);
1386 
1387    offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1388 
1389    /* If the address is uniform, then use the address from the first active
1390     * invocation 0 to load, and broadcast to all invocations.  We can't do
1391     * computed first active invocation for shared accesses (index == NULL),
1392     * though, since those don't do bounds checking and we could use an invalid
1393     * offset if exec_mask == 0.
1394     */
1395    if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1396       LLVMValueRef ssbo_limit;
1397       LLVMValueRef first_active = first_active_invocation(bld_base);
1398       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1399                                                      first_active, &ssbo_limit);
1400 
1401       offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1402 
1403       for (unsigned c = 0; c < nc; c++) {
1404          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1405 
1406          LLVMValueRef scalar;
1407          /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1408          if (ssbo_limit) {
1409             LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1410             LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1411             LLVMBuildStore(builder, zero, res_store);
1412 
1413             struct lp_build_if_state ifthen;
1414             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1415             LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1416             lp_build_endif(&ifthen);
1417 
1418             scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1419          } else {
1420             scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1421          }
1422 
1423          outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1424       }
1425       return;
1426    }
1427 
1428    /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1429 
1430    LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1431    for (unsigned c = 0; c < nc; c++)
1432       result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1433 
1434    LLVMValueRef exec_mask = mask_vec(bld_base);
1435    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1436    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1437       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1438       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1439 
1440       struct lp_build_if_state exec_ifthen;
1441       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1442 
1443       LLVMValueRef ssbo_limit;
1444       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1445                                                      counter, &ssbo_limit);
1446 
1447       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1448 
1449       for (unsigned c = 0; c < nc; c++) {
1450          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1451          LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1452          if (ssbo_limit) {
1453             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1454             do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1455          }
1456 
1457          struct lp_build_if_state ifthen;
1458          LLVMValueRef fetch_cond, temp_res;
1459 
1460          fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1461 
1462          lp_build_if(&ifthen, gallivm, fetch_cond);
1463          LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1464 
1465          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1466          temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1467          LLVMBuildStore(builder, temp_res, result[c]);
1468          lp_build_else(&ifthen);
1469          temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1470          LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1471          temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1472          LLVMBuildStore(builder, temp_res, result[c]);
1473          lp_build_endif(&ifthen);
1474       }
1475 
1476       lp_build_endif(&exec_ifthen);
1477    }
1478    for (unsigned c = 0; c < nc; c++)
1479       outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1480 
1481 }
1482 
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1483 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1484                            unsigned writemask,
1485                            unsigned nc,
1486                            unsigned bit_size,
1487                            bool index_and_offset_are_uniform,
1488                            bool payload,
1489                            LLVMValueRef index,
1490                            LLVMValueRef offset,
1491                            LLVMValueRef dst)
1492 {
1493    struct gallivm_state *gallivm = bld_base->base.gallivm;
1494    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1495    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1496    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1497    struct lp_build_context *store_bld;
1498    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1499    store_bld = get_int_bld(bld_base, true, bit_size);
1500 
1501    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1502 
1503    /* If the address is uniform, then just store the value from the first
1504     * channel instead of making LLVM unroll the invocation loop.  Note that we
1505     * don't use first_active_uniform(), since we aren't guaranteed that there is
1506     * actually an active invocation.
1507     */
1508    if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1509       LLVMValueRef ssbo_limit;
1510       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1511                                                      lp_build_const_int32(gallivm, 0), &ssbo_limit);
1512 
1513       offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1514 
1515       for (unsigned c = 0; c < nc; c++) {
1516          if (!(writemask & (1u << c)))
1517             continue;
1518 
1519          /* Pick out invocation 0's value. */
1520          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1521          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1522                                                          lp_build_const_int32(gallivm, 0), "");
1523          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1524 
1525          LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1526 
1527          /* If storing outside the SSBO, we need to skip the store instead. */
1528          if (ssbo_limit) {
1529             struct lp_build_if_state ifthen;
1530             lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1531             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1532             lp_build_endif(&ifthen);
1533          } else {
1534             lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1535          }
1536       }
1537       return;
1538    }
1539 
1540    LLVMValueRef exec_mask = mask_vec(bld_base);
1541    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1542    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1543       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1544       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1545 
1546       struct lp_build_if_state exec_ifthen;
1547       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1548 
1549       LLVMValueRef ssbo_limit;
1550       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1551                                                      counter, &ssbo_limit);
1552 
1553       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1554 
1555       for (unsigned c = 0; c < nc; c++) {
1556          if (!(writemask & (1u << c)))
1557             continue;
1558          LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1559          LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1560          LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1561 
1562          if (ssbo_limit) {
1563             LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1564             do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1565          }
1566 
1567          LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1568                                                           counter, "");
1569          value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1570          struct lp_build_if_state ifthen;
1571          LLVMValueRef store_cond;
1572 
1573          store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1574          lp_build_if(&ifthen, gallivm, store_cond);
1575          lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1576          lp_build_endif(&ifthen);
1577       }
1578 
1579       lp_build_endif(&exec_ifthen);
1580    }
1581 }
1582 
1583 
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1584 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1585                             nir_atomic_op nir_op,
1586                             uint32_t bit_size,
1587                             bool payload,
1588                             LLVMValueRef index, LLVMValueRef offset,
1589                             LLVMValueRef val, LLVMValueRef val2,
1590                             LLVMValueRef *result)
1591 {
1592    struct gallivm_state *gallivm = bld_base->base.gallivm;
1593    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1594    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1595    struct lp_build_context *uint_bld = &bld_base->uint_bld;
1596    uint32_t shift_val = bit_size_to_shift_size(bit_size);
1597    bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1598    struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1599 
1600    offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1601    LLVMValueRef atom_res = lp_build_alloca(gallivm,
1602                                            atomic_bld->vec_type, "");
1603 
1604    LLVMValueRef exec_mask = mask_vec(bld_base);
1605    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1606    for (unsigned i = 0; i < uint_bld->type.length; i++) {
1607       LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1608       LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1609 
1610       struct lp_build_if_state exec_ifthen;
1611       lp_build_if(&exec_ifthen, gallivm, loop_cond);
1612 
1613       LLVMValueRef ssbo_limit;
1614       LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1615                                                      counter, &ssbo_limit);
1616 
1617       LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1618 
1619       LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1620       if (ssbo_limit) {
1621          LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1622          do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1623       }
1624 
1625       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1626                                                        counter, "");
1627       value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1628 
1629       LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1630 
1631       struct lp_build_if_state ifthen;
1632       LLVMValueRef inner_cond, temp_res;
1633       LLVMValueRef scalar;
1634 
1635       inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1636       lp_build_if(&ifthen, gallivm, inner_cond);
1637 
1638       if (val2 != NULL) {
1639          LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1640                                                             counter, "");
1641          cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1642          scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1643                                          cas_src_ptr,
1644                                          LLVMAtomicOrderingSequentiallyConsistent,
1645                                          LLVMAtomicOrderingSequentiallyConsistent,
1646                                          false);
1647          scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1648       } else {
1649          scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1650                                      scalar_ptr, value_ptr,
1651                                      LLVMAtomicOrderingSequentiallyConsistent,
1652                                      false);
1653       }
1654       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1655       temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1656       LLVMBuildStore(builder, temp_res, atom_res);
1657       lp_build_else(&ifthen);
1658       temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1659       LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1660       temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1661       LLVMBuildStore(builder, temp_res, atom_res);
1662       lp_build_endif(&ifthen);
1663 
1664       lp_build_endif(&exec_ifthen);
1665    }
1666    *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1667 }
1668 
emit_barrier(struct lp_build_nir_context * bld_base)1669 static void emit_barrier(struct lp_build_nir_context *bld_base)
1670 {
1671    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1672    struct gallivm_state * gallivm = bld_base->base.gallivm;
1673 
1674    LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1675 
1676    lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1677    LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1678 }
1679 
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1680 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1681                                        LLVMValueRef index)
1682 {
1683    struct lp_build_context *bld_broad = &bld_base->uint_bld;
1684 
1685    LLVMValueRef size;
1686    ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1687 
1688    return lp_build_broadcast_scalar(bld_broad, size);
1689 }
1690 
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1691 static void emit_image_op(struct lp_build_nir_context *bld_base,
1692                           struct lp_img_params *params)
1693 {
1694    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1695    struct gallivm_state *gallivm = bld_base->base.gallivm;
1696 
1697    params->type = bld_base->base.type;
1698    params->resources_type = bld->resources_type;
1699    params->resources_ptr = bld->resources_ptr;
1700    params->thread_data_type = bld->thread_data_type;
1701    params->thread_data_ptr = bld->thread_data_ptr;
1702    params->exec_mask = mask_vec(bld_base);
1703 
1704    if (params->image_index_offset)
1705       params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1706                                                            first_active_invocation(bld_base), "");
1707 
1708    if (params->resource)
1709       params->resource = build_resource_to_scalar(bld_base, params->resource);
1710 
1711    bld->image->emit_op(bld->image,
1712                        bld->bld_base.base.gallivm,
1713                        params);
1714 
1715 }
1716 
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1717 static void emit_image_size(struct lp_build_nir_context *bld_base,
1718                             struct lp_sampler_size_query_params *params)
1719 {
1720    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1721    struct gallivm_state *gallivm = bld_base->base.gallivm;
1722 
1723    params->int_type = bld_base->int_bld.type;
1724    params->resources_type = bld->resources_type;
1725    params->resources_ptr = bld->resources_ptr;
1726    if (params->texture_unit_offset)
1727       params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1728                                                             first_active_invocation(bld_base), "");
1729    bld->image->emit_size_query(bld->image,
1730                                bld->bld_base.base.gallivm,
1731                                params);
1732 
1733 }
1734 
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1735 static void init_var_slots(struct lp_build_nir_context *bld_base,
1736                            nir_variable *var, unsigned sc)
1737 {
1738    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1739    unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1740 
1741    if (!bld->outputs)
1742      return;
1743    for (unsigned comp = sc; comp < slots + sc; comp++) {
1744       unsigned this_loc = var->data.driver_location + (comp / 4);
1745       unsigned this_chan = comp % 4;
1746 
1747       if (!bld->outputs[this_loc][this_chan])
1748          bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1749                                                              bld_base->base.vec_type, "output");
1750    }
1751 }
1752 
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1753 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1754                           nir_variable *var)
1755 {
1756    unsigned sc = var->data.location_frac;
1757    switch (var->data.mode) {
1758    case nir_var_shader_out: {
1759       if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1760          if (var->data.location == FRAG_RESULT_STENCIL)
1761             sc = 1;
1762          else if (var->data.location == FRAG_RESULT_DEPTH)
1763             sc = 2;
1764       }
1765       init_var_slots(bld_base, var, sc);
1766       break;
1767    }
1768    default:
1769       break;
1770    }
1771 }
1772 
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1773 static void emit_tex(struct lp_build_nir_context *bld_base,
1774                      struct lp_sampler_params *params)
1775 {
1776    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1777    struct gallivm_state *gallivm = bld_base->base.gallivm;
1778 
1779    params->type = bld_base->base.type;
1780    params->resources_type = bld->resources_type;
1781    params->resources_ptr = bld->resources_ptr;
1782    params->thread_data_type = bld->thread_data_type;
1783    params->thread_data_ptr = bld->thread_data_ptr;
1784    params->exec_mask = mask_vec(bld_base);
1785 
1786    if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1787       /* this is horrible but this can be dynamic */
1788       LLVMValueRef coords[5];
1789       LLVMValueRef *orig_texel_ptr;
1790       struct lp_build_context *uint_bld = &bld_base->uint_bld;
1791       LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1792                                  LLVMGetUndef(bld_base->base.vec_type),
1793                                  LLVMGetUndef(bld_base->base.vec_type),
1794                                  LLVMGetUndef(bld_base->base.vec_type) };
1795       LLVMValueRef texel[4], orig_offset, orig_lod;
1796       unsigned i;
1797       orig_texel_ptr = params->texel;
1798       orig_lod = params->lod;
1799       for (i = 0; i < 5; i++) {
1800          coords[i] = params->coords[i];
1801       }
1802       orig_offset = params->texture_index_offset;
1803 
1804       for (unsigned v = 0; v < uint_bld->type.length; v++) {
1805          LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1806          LLVMValueRef new_coords[5];
1807          for (i = 0; i < 5; i++) {
1808             new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1809                                                     coords[i], idx, "");
1810          }
1811          params->coords = new_coords;
1812          params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1813                                                                 orig_offset,
1814                                                                 idx, "");
1815          params->type = lp_elem_type(bld_base->base.type);
1816 
1817          if (orig_lod)
1818             params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1819          params->texel = texel;
1820          bld->sampler->emit_tex_sample(bld->sampler,
1821                                        gallivm,
1822                                        params);
1823 
1824          for (i = 0; i < 4; i++) {
1825             result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1826          }
1827       }
1828       for (i = 0; i < 4; i++) {
1829          orig_texel_ptr[i] = result[i];
1830       }
1831       return;
1832    }
1833 
1834    if (params->texture_index_offset) {
1835       params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1836                                                              first_active_invocation(bld_base), "");
1837    }
1838 
1839    if (params->texture_resource)
1840       params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1841 
1842    if (params->sampler_resource)
1843       params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1844 
1845    params->type = bld_base->base.type;
1846    bld->sampler->emit_tex_sample(bld->sampler,
1847                                  bld->bld_base.base.gallivm,
1848                                  params);
1849 }
1850 
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1851 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1852                           struct lp_sampler_size_query_params *params)
1853 {
1854    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1855 
1856    params->int_type = bld_base->int_bld.type;
1857    params->resources_type = bld->resources_type;
1858    params->resources_ptr = bld->resources_ptr;
1859    if (params->texture_unit_offset)
1860       params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1861                                                              params->texture_unit_offset,
1862                                                              lp_build_const_int32(bld_base->base.gallivm, 0), "");
1863 
1864    params->exec_mask = mask_vec(bld_base);
1865    if (params->resource)
1866       params->resource = build_resource_to_scalar(bld_base, params->resource);
1867 
1868    bld->sampler->emit_size_query(bld->sampler,
1869                                  bld->bld_base.base.gallivm,
1870                                  params);
1871 }
1872 
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1873 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1874 {
1875    struct lp_build_nir_context *bld_base = &bld->bld_base;
1876    LLVMValueRef tmp, tmp2;
1877 
1878    tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1879    tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1880    tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1881    tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1882 
1883    tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1884    tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1885    tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1886    return tmp;
1887 }
1888 
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1889 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1890                                nir_intrinsic_instr *instr,
1891                                LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1892 {
1893    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1894    struct gallivm_state *gallivm = bld_base->base.gallivm;
1895    struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1896    switch (instr->intrinsic) {
1897    case nir_intrinsic_load_instance_id:
1898       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1899       break;
1900    case nir_intrinsic_load_base_instance:
1901       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1902       break;
1903    case nir_intrinsic_load_base_vertex:
1904       result[0] = bld->system_values.basevertex;
1905       break;
1906    case nir_intrinsic_load_first_vertex:
1907       result[0] = bld->system_values.firstvertex;
1908       break;
1909    case nir_intrinsic_load_vertex_id:
1910       result[0] = bld->system_values.vertex_id;
1911       break;
1912    case nir_intrinsic_load_primitive_id:
1913       result[0] = bld->system_values.prim_id;
1914       break;
1915    case nir_intrinsic_load_workgroup_id: {
1916       LLVMValueRef tmp[3];
1917       for (unsigned i = 0; i < 3; i++) {
1918          tmp[i] = bld->system_values.block_id[i];
1919          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1920       }
1921       break;
1922    }
1923    case nir_intrinsic_load_local_invocation_id:
1924       for (unsigned i = 0; i < 3; i++)
1925          result[i] = bld->system_values.thread_id[i];
1926       break;
1927    case nir_intrinsic_load_local_invocation_index:
1928       result[0] = get_local_invocation_index(bld);
1929       break;
1930    case nir_intrinsic_load_num_workgroups: {
1931       LLVMValueRef tmp[3];
1932       for (unsigned i = 0; i < 3; i++) {
1933          tmp[i] = bld->system_values.grid_size[i];
1934          result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1935       }
1936       break;
1937    }
1938    case nir_intrinsic_load_invocation_id:
1939       if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1940          result[0] = bld->system_values.invocation_id;
1941       else
1942          result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1943       break;
1944    case nir_intrinsic_load_front_face:
1945       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1946       break;
1947    case nir_intrinsic_load_draw_id:
1948       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1949       break;
1950    default:
1951       break;
1952    case nir_intrinsic_load_workgroup_size:
1953      for (unsigned i = 0; i < 3; i++)
1954        result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1955      break;
1956    case nir_intrinsic_load_work_dim:
1957       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1958       break;
1959    case nir_intrinsic_load_tess_coord:
1960       for (unsigned i = 0; i < 3; i++) {
1961 	 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1962       }
1963       break;
1964    case nir_intrinsic_load_tess_level_outer:
1965       for (unsigned i = 0; i < 4; i++)
1966          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1967       break;
1968    case nir_intrinsic_load_tess_level_inner:
1969       for (unsigned i = 0; i < 2; i++)
1970          result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1971       break;
1972    case nir_intrinsic_load_patch_vertices_in:
1973       result[0] = bld->system_values.vertices_in;
1974       break;
1975    case nir_intrinsic_load_sample_id:
1976       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1977       break;
1978    case nir_intrinsic_load_sample_pos:
1979       for (unsigned i = 0; i < 2; i++) {
1980          LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1981          idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1982          LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
1983                                                 bld->system_values.sample_pos, idx);
1984          result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1985       }
1986       break;
1987    case nir_intrinsic_load_sample_mask_in:
1988       result[0] = bld->system_values.sample_mask_in;
1989       break;
1990    case nir_intrinsic_load_view_index:
1991       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1992       break;
1993    case nir_intrinsic_load_subgroup_invocation: {
1994       LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1995       for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1996          elems[i] = lp_build_const_int32(gallivm, i);
1997       result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1998       break;
1999    }
2000    case nir_intrinsic_load_subgroup_id:
2001       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
2002       break;
2003    case nir_intrinsic_load_num_subgroups:
2004       result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2005       break;
2006    }
2007 }
2008 
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2009 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2010                                    LLVMValueRef *dst)
2011 {
2012    struct gallivm_state *gallivm = bld_base->base.gallivm;
2013    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2014    *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2015 }
2016 
bgnloop(struct lp_build_nir_context * bld_base)2017 static void bgnloop(struct lp_build_nir_context *bld_base)
2018 {
2019    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2020    lp_exec_bgnloop(&bld->exec_mask, true);
2021 }
2022 
endloop(struct lp_build_nir_context * bld_base)2023 static void endloop(struct lp_build_nir_context *bld_base)
2024 {
2025    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2026    lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2027 }
2028 
lp_build_skip_branch(struct lp_build_nir_context * bld_base,bool flatten)2029 static void lp_build_skip_branch(struct lp_build_nir_context *bld_base, bool flatten)
2030 {
2031    if (flatten)
2032       return;
2033 
2034    struct gallivm_state *gallivm = bld_base->base.gallivm;
2035    LLVMBuilderRef builder = gallivm->builder;
2036 
2037    LLVMValueRef exec_mask = mask_vec(bld_base);
2038 
2039    LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2040    bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, bld_base->uint_bld.type.length), "");
2041    bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
2042 
2043    LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
2044 
2045    assert(bld_base->if_stack_size < LP_MAX_TGSI_NESTING);
2046    lp_build_if(&bld_base->if_stack[bld_base->if_stack_size], gallivm, any_active);
2047    bld_base->if_stack_size++;
2048 }
2049 
lp_build_skip_branch_end(struct lp_build_nir_context * bld_base,bool flatten)2050 static void lp_build_skip_branch_end(struct lp_build_nir_context *bld_base, bool flatten)
2051 {
2052    if (flatten)
2053       return;
2054 
2055    assert(bld_base->if_stack_size);
2056    bld_base->if_stack_size--;
2057    lp_build_endif(&bld_base->if_stack[bld_base->if_stack_size]);
2058 }
2059 
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond,bool flatten)2060 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond, bool flatten)
2061 {
2062    struct gallivm_state *gallivm = bld_base->base.gallivm;
2063 
2064    LLVMBuilderRef builder = gallivm->builder;
2065    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2066    lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2067 
2068    lp_build_skip_branch(bld_base, flatten);
2069 }
2070 
else_stmt(struct lp_build_nir_context * bld_base,bool flatten_then,bool flatten_else)2071 static void else_stmt(struct lp_build_nir_context *bld_base, bool flatten_then, bool flatten_else)
2072 {
2073    lp_build_skip_branch_end(bld_base, flatten_then);
2074 
2075    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2076    lp_exec_mask_cond_invert(&bld->exec_mask);
2077 
2078    lp_build_skip_branch(bld_base, flatten_else);
2079 }
2080 
endif_stmt(struct lp_build_nir_context * bld_base,bool flatten)2081 static void endif_stmt(struct lp_build_nir_context *bld_base, bool flatten)
2082 {
2083    lp_build_skip_branch_end(bld_base, flatten);
2084 
2085    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2086    lp_exec_mask_cond_pop(&bld->exec_mask);
2087 }
2088 
break_stmt(struct lp_build_nir_context * bld_base)2089 static void break_stmt(struct lp_build_nir_context *bld_base)
2090 {
2091    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2092 
2093    lp_exec_break(&bld->exec_mask, NULL, false);
2094 }
2095 
continue_stmt(struct lp_build_nir_context * bld_base)2096 static void continue_stmt(struct lp_build_nir_context *bld_base)
2097 {
2098    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2099    lp_exec_continue(&bld->exec_mask);
2100 }
2101 
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2102 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2103 {
2104    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2105    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2106    LLVMValueRef mask;
2107 
2108    if (!cond) {
2109       if (bld->exec_mask.has_mask) {
2110          mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2111       } else {
2112          mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2113       }
2114    } else {
2115       mask = LLVMBuildNot(builder, cond, "");
2116       if (bld->exec_mask.has_mask) {
2117          LLVMValueRef invmask;
2118          invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2119          mask = LLVMBuildOr(builder, mask, invmask, "");
2120       }
2121    }
2122    lp_build_mask_update(bld->mask, mask);
2123 }
2124 
2125 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2126 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2127                           LLVMValueRef ptr,
2128                           LLVMValueRef mask)
2129 {
2130    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2131    LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2132 
2133    current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2134 
2135    LLVMBuildStore(builder, current_vec, ptr);
2136 }
2137 
2138 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2139 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2140                              LLVMValueRef ptr,
2141                              LLVMValueRef mask)
2142 {
2143    LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2144    LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2145 
2146    current_vec = lp_build_select(&bld_base->uint_bld,
2147                                  mask,
2148                                  bld_base->uint_bld.zero,
2149                                  current_vec);
2150 
2151    LLVMBuildStore(builder, current_vec, ptr);
2152 }
2153 
2154 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2155 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2156                                   LLVMValueRef current_mask_vec,
2157                                   LLVMValueRef total_emitted_vertices_vec)
2158 {
2159    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2160    struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2161    LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2162                                             total_emitted_vertices_vec,
2163                                             bld->max_output_vertices_vec);
2164 
2165    return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2166 }
2167 
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2168 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2169 {
2170    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2171    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2172 
2173    if (stream_id >= bld->gs_vertex_streams)
2174       return;
2175    assert(bld->gs_iface->emit_vertex);
2176    LLVMValueRef total_emitted_vertices_vec =
2177       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2178    LLVMValueRef mask = mask_vec(bld_base);
2179    mask = clamp_mask_to_max_output_vertices(bld, mask,
2180                                             total_emitted_vertices_vec);
2181    bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2182                               bld->outputs,
2183                               total_emitted_vertices_vec,
2184                               mask,
2185                               lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2186 
2187    increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2188                              mask);
2189    increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2190                              mask);
2191 }
2192 
2193 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2194 end_primitive_masked(struct lp_build_nir_context * bld_base,
2195                      LLVMValueRef mask, uint32_t stream_id)
2196 {
2197    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2198    LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2199 
2200    if (stream_id >= bld->gs_vertex_streams)
2201       return;
2202    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2203    LLVMValueRef emitted_vertices_vec =
2204       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2205    LLVMValueRef emitted_prims_vec =
2206       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2207    LLVMValueRef total_emitted_vertices_vec =
2208       LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2209 
2210    LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2211                                             PIPE_FUNC_NOTEQUAL,
2212                                             emitted_vertices_vec,
2213                                             uint_bld->zero);
2214    mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2215    bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2216 				total_emitted_vertices_vec,
2217 				emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2218    increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2219                              mask);
2220    clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2221                                 mask);
2222 }
2223 
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2224 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2225 {
2226    ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2227 
2228    assert(bld->gs_iface->end_primitive);
2229 
2230    LLVMValueRef mask = mask_vec(bld_base);
2231    end_primitive_masked(bld_base, mask, stream_id);
2232 }
2233 
2234 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2235 emit_prologue(struct lp_build_nir_soa_context *bld)
2236 {
2237    struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2238    if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2239       uint32_t num_inputs = bld->num_inputs;
2240       /* If this is an indirect case, the number of inputs should not be 0 */
2241       assert(num_inputs > 0);
2242 
2243       unsigned index, chan;
2244       LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2245       LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2246       bld->inputs_array = lp_build_array_alloca(gallivm,
2247                                                vec_type, array_size,
2248                                                "input_array");
2249 
2250       for (index = 0; index < num_inputs; ++index) {
2251          for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2252             LLVMValueRef lindex =
2253                lp_build_const_int32(gallivm, index * 4 + chan);
2254             LLVMValueRef input_ptr =
2255                LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2256             LLVMValueRef value = bld->inputs[index][chan];
2257             if (value)
2258                LLVMBuildStore(gallivm->builder, value, input_ptr);
2259          }
2260       }
2261    }
2262 }
2263 
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2264 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2265                       nir_intrinsic_instr *instr, LLVMValueRef result[4])
2266 {
2267    struct gallivm_state * gallivm = bld_base->base.gallivm;
2268    LLVMBuilderRef builder = gallivm->builder;
2269    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2270    LLVMValueRef exec_mask = mask_vec(bld_base);
2271    struct lp_build_loop_state loop_state;
2272    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2273 
2274    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2275    LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2276    LLVMValueRef init_val = NULL;
2277    if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2278        instr->intrinsic == nir_intrinsic_vote_feq) {
2279       /* for equal we unfortunately have to loop and find the first valid one. */
2280       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2281       LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2282 
2283       struct lp_build_if_state ifthen;
2284       lp_build_if(&ifthen, gallivm, if_cond);
2285       LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2286                                                        loop_state.counter, "");
2287       LLVMBuildStore(builder, value_ptr, eq_store);
2288       LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2289       lp_build_endif(&ifthen);
2290       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2291                              NULL, LLVMIntUGE);
2292       init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2293    } else {
2294       LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2295    }
2296 
2297    LLVMValueRef res;
2298    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2299    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2300                                                        loop_state.counter, "");
2301    struct lp_build_if_state ifthen;
2302    LLVMValueRef if_cond;
2303    if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2304 
2305    lp_build_if(&ifthen, gallivm, if_cond);
2306    res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2307 
2308    if (instr->intrinsic == nir_intrinsic_vote_feq) {
2309       struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2310       LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2311                                        LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2312                                        LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2313       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2314       res = LLVMBuildAnd(builder, res, tmp, "");
2315    } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2316       LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2317       tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2318       res = LLVMBuildAnd(builder, res, tmp, "");
2319    } else if (instr->intrinsic == nir_intrinsic_vote_any)
2320       res = LLVMBuildOr(builder, res, value_ptr, "");
2321    else
2322       res = LLVMBuildAnd(builder, res, value_ptr, "");
2323    LLVMBuildStore(builder, res, res_store);
2324    lp_build_endif(&ifthen);
2325    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2326                           NULL, LLVMIntUGE);
2327    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2328                                          LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2329 }
2330 
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2331 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2332 {
2333    struct gallivm_state * gallivm = bld_base->base.gallivm;
2334    LLVMBuilderRef builder = gallivm->builder;
2335    LLVMValueRef exec_mask = mask_vec(bld_base);
2336    struct lp_build_loop_state loop_state;
2337    src = LLVMBuildAnd(builder, src, exec_mask, "");
2338    LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2339    LLVMValueRef res;
2340    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2341    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2342                                                     loop_state.counter, "");
2343    res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2344    res = LLVMBuildOr(builder,
2345                      res,
2346                      LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2347    LLVMBuildStore(builder, res, res_store);
2348 
2349    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2350                           NULL, LLVMIntUGE);
2351    result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2352                                          LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2353 }
2354 
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2355 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2356 {
2357    struct gallivm_state *gallivm = bld_base->base.gallivm;
2358    LLVMBuilderRef builder = gallivm->builder;
2359    LLVMValueRef exec_mask = mask_vec(bld_base);
2360    struct lp_build_loop_state loop_state;
2361 
2362    LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2363    LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2364    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2365    LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2366                                                     loop_state.counter, "");
2367    LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2368                                      LLVMIntEQ,
2369                                      value_ptr,
2370                                      lp_build_const_int32(gallivm, -1), "");
2371    LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2372                                       LLVMIntEQ,
2373                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2374                                       lp_build_const_int32(gallivm, 0), "");
2375 
2376    cond = LLVMBuildAnd(builder, cond, cond2, "");
2377    struct lp_build_if_state ifthen;
2378    lp_build_if(&ifthen, gallivm, cond);
2379    LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2380    LLVMBuildStore(builder, loop_state.counter, idx_store);
2381    lp_build_endif(&ifthen);
2382    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2383                           NULL, LLVMIntUGE);
2384 
2385    result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2386                                       lp_build_const_int32(gallivm, -1),
2387                                       LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2388                                       "");
2389 }
2390 
2391 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2392 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2393                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2394 {
2395    assert(instr->intrinsic == nir_intrinsic_shuffle);
2396 
2397    struct gallivm_state *gallivm = bld_base->base.gallivm;
2398    LLVMBuilderRef builder = gallivm->builder;
2399    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2400    uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2401    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2402 
2403    if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2404       /* freeze `src` in case inactive invocations contain poison */
2405       src = LLVMBuildFreeze(builder, src, "");
2406       result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2407    } else {
2408       LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2409       struct lp_build_loop_state loop_state;
2410       lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2411 
2412       LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2413 
2414       LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2415       /* freeze `src_value` in case an out-of-bounds index or an index into an
2416        * inactive invocation results in poison
2417        */
2418       src_value = LLVMBuildFreeze(builder, src_value, "");
2419 
2420       LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2421       res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2422       LLVMBuildStore(builder, res, res_store);
2423 
2424       lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2425                              NULL, LLVMIntUGE);
2426 
2427       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2428    }
2429 }
2430 #endif
2431 
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2432 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2433                         nir_intrinsic_instr *instr, LLVMValueRef result[4])
2434 {
2435    struct gallivm_state *gallivm = bld_base->base.gallivm;
2436    LLVMBuilderRef builder = gallivm->builder;
2437    uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2438    /* can't use llvm reduction intrinsics because of exec_mask */
2439    LLVMValueRef exec_mask = mask_vec(bld_base);
2440    struct lp_build_loop_state loop_state;
2441    nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2442 
2443    LLVMValueRef res_store = NULL;
2444    LLVMValueRef scan_store;
2445    struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2446 
2447    if (instr->intrinsic != nir_intrinsic_reduce)
2448       res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2449 
2450    scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2451 
2452    struct lp_build_context elem_bld;
2453    bool is_flt = reduction_op == nir_op_fadd ||
2454       reduction_op == nir_op_fmul ||
2455       reduction_op == nir_op_fmin ||
2456       reduction_op == nir_op_fmax;
2457    bool is_unsigned = reduction_op == nir_op_umin ||
2458       reduction_op == nir_op_umax;
2459 
2460    struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2461       get_int_bld(bld_base, is_unsigned, bit_size);
2462 
2463    lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2464 
2465    LLVMValueRef store_val = NULL;
2466    /*
2467     * Put the identity value for the operation into the storage
2468     */
2469    switch (reduction_op) {
2470    case nir_op_fmin: {
2471       LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2472          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2473       store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2474       break;
2475    }
2476    case nir_op_fmax: {
2477       LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2478          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2479       store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2480       break;
2481    }
2482    case nir_op_fmul: {
2483       LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2484          (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2485       store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2486       break;
2487    }
2488    case nir_op_umin:
2489       switch (bit_size) {
2490       case 8:
2491          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2492          break;
2493       case 16:
2494          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2495          break;
2496       case 32:
2497       default:
2498          store_val  = lp_build_const_int32(gallivm, UINT_MAX);
2499          break;
2500       case 64:
2501          store_val  = lp_build_const_int64(gallivm, UINT64_MAX);
2502          break;
2503       }
2504       break;
2505    case nir_op_imin:
2506       switch (bit_size) {
2507       case 8:
2508          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2509          break;
2510       case 16:
2511          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2512          break;
2513       case 32:
2514       default:
2515          store_val  = lp_build_const_int32(gallivm, INT_MAX);
2516          break;
2517       case 64:
2518          store_val  = lp_build_const_int64(gallivm, INT64_MAX);
2519          break;
2520       }
2521       break;
2522    case nir_op_imax:
2523       switch (bit_size) {
2524       case 8:
2525          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2526          break;
2527       case 16:
2528          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2529          break;
2530       case 32:
2531       default:
2532          store_val  = lp_build_const_int32(gallivm, INT_MIN);
2533          break;
2534       case 64:
2535          store_val  = lp_build_const_int64(gallivm, INT64_MIN);
2536          break;
2537       }
2538       break;
2539    case nir_op_imul:
2540       switch (bit_size) {
2541       case 8:
2542          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2543          break;
2544       case 16:
2545          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2546          break;
2547       case 32:
2548       default:
2549          store_val  = lp_build_const_int32(gallivm, 1);
2550          break;
2551       case 64:
2552          store_val  = lp_build_const_int64(gallivm, 1);
2553          break;
2554       }
2555       break;
2556    case nir_op_iand:
2557       switch (bit_size) {
2558       case 8:
2559          store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2560          break;
2561       case 16:
2562          store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2563          break;
2564       case 32:
2565       default:
2566          store_val  = lp_build_const_int32(gallivm, 0xffffffff);
2567          break;
2568       case 64:
2569          store_val  = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2570          break;
2571       }
2572       break;
2573    default:
2574       break;
2575    }
2576    if (store_val)
2577       LLVMBuildStore(builder, store_val, scan_store);
2578 
2579    LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2580 
2581    lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2582 
2583    struct lp_build_if_state ifthen;
2584    LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2585    lp_build_if(&ifthen, gallivm, if_cond);
2586    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2587 
2588    LLVMValueRef res = NULL;
2589    LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2590    if (instr->intrinsic != nir_intrinsic_reduce)
2591       res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2592 
2593    if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2594       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2595 
2596    if (is_flt) {
2597       scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2598       value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2599    }
2600    switch (reduction_op) {
2601    case nir_op_fadd:
2602    case nir_op_iadd:
2603       scan_val = lp_build_add(&elem_bld, value, scan_val);
2604       break;
2605    case nir_op_fmul:
2606    case nir_op_imul:
2607       scan_val = lp_build_mul(&elem_bld, value, scan_val);
2608       break;
2609    case nir_op_imin:
2610    case nir_op_umin:
2611    case nir_op_fmin:
2612       scan_val = lp_build_min(&elem_bld, value, scan_val);
2613       break;
2614    case nir_op_imax:
2615    case nir_op_umax:
2616    case nir_op_fmax:
2617       scan_val = lp_build_max(&elem_bld, value, scan_val);
2618       break;
2619    case nir_op_iand:
2620       scan_val = lp_build_and(&elem_bld, value, scan_val);
2621       break;
2622    case nir_op_ior:
2623       scan_val = lp_build_or(&elem_bld, value, scan_val);
2624       break;
2625    case nir_op_ixor:
2626       scan_val = lp_build_xor(&elem_bld, value, scan_val);
2627       break;
2628    default:
2629       assert(0);
2630       break;
2631    }
2632    if (is_flt)
2633       scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2634    LLVMBuildStore(builder, scan_val, scan_store);
2635 
2636    if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2637       res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2638    }
2639 
2640    if (instr->intrinsic != nir_intrinsic_reduce)
2641       LLVMBuildStore(builder, res, res_store);
2642    lp_build_endif(&ifthen);
2643 
2644    lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2645                           NULL, LLVMIntUGE);
2646    if (instr->intrinsic == nir_intrinsic_reduce)
2647       result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2648    else
2649       result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2650 }
2651 
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2652 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2653                                  LLVMValueRef src,
2654                                  unsigned bit_size,
2655                                  LLVMValueRef invoc,
2656                                  LLVMValueRef result[4])
2657 {
2658    struct gallivm_state *gallivm = bld_base->base.gallivm;
2659    LLVMValueRef idx = first_active_invocation(bld_base);
2660    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2661 
2662    /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2663     * use the first active channel to pull the invocation index number out of
2664     * the invocation arg.
2665     */
2666    if (invoc)
2667       idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2668 
2669    LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2670                                                 src, idx, "");
2671    result[0] = lp_build_broadcast_scalar(uint_bld, value);
2672 }
2673 
2674 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2675 emit_interp_at(struct lp_build_nir_context *bld_base,
2676                unsigned num_components,
2677                nir_variable *var,
2678                bool centroid,
2679                bool sample,
2680                unsigned const_index,
2681                LLVMValueRef indir_index,
2682                LLVMValueRef offsets[2],
2683                LLVMValueRef dst[4])
2684 {
2685    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2686 
2687    for (unsigned i = 0; i < num_components; i++) {
2688       dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2689                                         const_index + var->data.driver_location, i + var->data.location_frac,
2690                                         centroid, sample, indir_index, offsets);
2691    }
2692 }
2693 
2694 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2695 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2696                                     LLVMValueRef vert_count,
2697                                     LLVMValueRef prim_count)
2698 {
2699    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2700    struct gallivm_state *gallivm = bld_base->base.gallivm;
2701    assert(bld->mesh_iface);
2702    LLVMValueRef idx = first_active_invocation(bld_base);
2703 
2704    LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2705                                                  vert_count, idx, "");
2706    LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2707                                                  prim_count, idx, "");
2708 
2709    bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2710 }
2711 
2712 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2713 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2714                             LLVMValueRef launch_grid)
2715 {
2716    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2717    struct gallivm_state *gallivm = bld_base->base.gallivm;
2718    LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2719 
2720    LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2721 
2722    vec_type = LLVMPointerType(vec_type, 0);
2723 
2724    local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2725    LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2726    struct lp_build_if_state ifthen;
2727    lp_build_if(&ifthen, gallivm, if_cond);
2728    LLVMValueRef ptr = bld->payload_ptr;
2729    ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2730    for (unsigned i = 0; i < 3; i++) {
2731       LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2732       lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2733       LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2734       LLVMBuildStore(gallivm->builder, lg, this_ptr);
2735       ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2736    }
2737    lp_build_endif(&ifthen);
2738 }
2739 
2740 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2741 emit_call(struct lp_build_nir_context *bld_base,
2742           struct lp_build_fn *fn,
2743           int num_args,
2744           LLVMValueRef *args)
2745 {
2746    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2747 
2748    args[0] = mask_vec(bld_base);
2749    args[1] = bld->call_context_ptr;
2750    LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2751 }
2752 
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2753 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2754                                                struct lp_type type,
2755                                                unsigned scratch_size)
2756 {
2757    LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2758    LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2759    unsigned i;
2760 
2761    if (type.length == 1)
2762       return LLVMConstInt(elem_type, 0, 0);
2763 
2764    for (i = 0; i < type.length; ++i)
2765       elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2766 
2767    return LLVMConstVector(elems, type.length);
2768 }
2769 
2770 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2771 emit_load_scratch(struct lp_build_nir_context *bld_base,
2772                   unsigned nc, unsigned bit_size,
2773                   LLVMValueRef offset,
2774                   LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2775 {
2776    struct gallivm_state * gallivm = bld_base->base.gallivm;
2777    LLVMBuilderRef builder = gallivm->builder;
2778    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2779    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2780    struct lp_build_context *load_bld;
2781    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2782    LLVMValueRef exec_mask = mask_vec(bld_base);
2783    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2784                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2785                                                      bld->scratch_ptr);
2786    load_bld = get_int_bld(bld_base, true, bit_size);
2787 
2788    offset = lp_build_add(uint_bld, offset, thread_offsets);
2789 
2790    for (unsigned c = 0; c < nc; c++) {
2791       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2792 
2793       outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2794                                          load_bld->vec_type,
2795                                          lp_vec_add_offset_ptr(bld_base, bit_size,
2796                                                                scratch_ptr_vec,
2797                                                                chan_offset),
2798                                          exec_mask);
2799       outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2800    }
2801 }
2802 
2803 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2804 emit_store_scratch(struct lp_build_nir_context *bld_base,
2805                    unsigned writemask, unsigned nc,
2806                    unsigned bit_size, LLVMValueRef offset,
2807                    LLVMValueRef dst)
2808 {
2809    struct gallivm_state * gallivm = bld_base->base.gallivm;
2810    LLVMBuilderRef builder = gallivm->builder;
2811    struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2812    struct lp_build_context *uint_bld = &bld_base->uint_bld;
2813    struct lp_build_context *store_bld;
2814    LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2815    LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2816                                                      LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2817                                                      bld->scratch_ptr);
2818    store_bld = get_int_bld(bld_base, true, bit_size);
2819 
2820    LLVMValueRef exec_mask = mask_vec(bld_base);
2821    offset = lp_build_add(uint_bld, offset, thread_offsets);
2822 
2823    for (unsigned c = 0; c < nc; c++) {
2824       if (!(writemask & (1u << c)))
2825          continue;
2826       LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2827 
2828       LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2829 
2830       val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2831 
2832       lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2833                               lp_vec_add_offset_ptr(bld_base, bit_size,
2834                                                     scratch_ptr_vec, chan_offset),
2835                               val, exec_mask);
2836    }
2837 }
2838 
2839 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2840 emit_clock(struct lp_build_nir_context *bld_base,
2841            LLVMValueRef dst[4])
2842 {
2843    struct gallivm_state *gallivm = bld_base->base.gallivm;
2844    LLVMBuilderRef builder = gallivm->builder;
2845    struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2846 
2847    lp_init_clock_hook(gallivm);
2848 
2849    LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2850    LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2851 
2852    LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2853    hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2854    LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2855    dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2856    dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2857 }
2858 
2859 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2860 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2861                               LLVMTypeRef context_type, LLVMTypeRef resources_type)
2862 {
2863    LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2864 
2865    args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2866    args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2867    args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2868    args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2869    args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2870    args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2871    args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2872    args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2873    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2874    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2875    args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2876 
2877    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2878    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2879    args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2880    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2881    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2882    args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2883 
2884    LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2885    return stype;
2886 }
2887 
2888 static void
build_call_context(struct lp_build_nir_soa_context * bld)2889 build_call_context(struct lp_build_nir_soa_context *bld)
2890 {
2891    struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2892    bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2893    LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2894    call_context = LLVMBuildInsertValue(gallivm->builder,
2895                                        call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2896    call_context = LLVMBuildInsertValue(gallivm->builder,
2897                                        call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2898    if (bld->shared_ptr) {
2899       call_context = LLVMBuildInsertValue(gallivm->builder,
2900                                           call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2901    } else {
2902       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2903                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2904                                           LP_NIR_CALL_CONTEXT_SHARED, "");
2905    }
2906    if (bld->scratch_ptr) {
2907       call_context = LLVMBuildInsertValue(gallivm->builder,
2908                                           call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2909    } else {
2910       call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2911                                           LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2912                                           LP_NIR_CALL_CONTEXT_SCRATCH, "");
2913    }
2914    call_context = LLVMBuildInsertValue(gallivm->builder,
2915                                        call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2916    call_context = LLVMBuildInsertValue(gallivm->builder,
2917                                        call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2918    call_context = LLVMBuildInsertValue(gallivm->builder,
2919                                        call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2920    call_context = LLVMBuildInsertValue(gallivm->builder,
2921                                        call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2922    call_context = LLVMBuildInsertValue(gallivm->builder,
2923                                        call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2924    call_context = LLVMBuildInsertValue(gallivm->builder,
2925                                        call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2926    call_context = LLVMBuildInsertValue(gallivm->builder,
2927                                        call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2928    call_context = LLVMBuildInsertValue(gallivm->builder,
2929                                        call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
2930    call_context = LLVMBuildInsertValue(gallivm->builder,
2931                                        call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
2932    call_context = LLVMBuildInsertValue(gallivm->builder,
2933                                        call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
2934    call_context = LLVMBuildInsertValue(gallivm->builder,
2935                                        call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
2936    call_context = LLVMBuildInsertValue(gallivm->builder,
2937                                        call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
2938    call_context = LLVMBuildInsertValue(gallivm->builder,
2939                                        call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
2940    LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
2941 }
2942 
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2943 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
2944                            struct nir_shader *shader,
2945                            nir_function_impl *impl,
2946                            const struct lp_build_tgsi_params *params,
2947                            LLVMValueRef (*outputs)[4])
2948 {
2949    struct lp_build_nir_soa_context bld;
2950    const struct lp_type type = params->type;
2951    struct lp_type res_type;
2952 
2953    assert(type.length <= LP_MAX_VECTOR_LENGTH);
2954    memset(&res_type, 0, sizeof res_type);
2955    res_type.width = type.width;
2956    res_type.length = type.length;
2957    res_type.sign = 1;
2958 
2959    /* Setup build context */
2960    memset(&bld, 0, sizeof bld);
2961    lp_build_context_init(&bld.bld_base.base, gallivm, type);
2962    lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2963    lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2964    lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2965    lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2966    {
2967       struct lp_type dbl_type;
2968       dbl_type = type;
2969       dbl_type.width *= 2;
2970       lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2971    }
2972    {
2973       struct lp_type half_type;
2974       half_type = type;
2975       half_type.width /= 2;
2976       lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2977    }
2978    {
2979       struct lp_type uint64_type;
2980       uint64_type = lp_uint_type(type);
2981       uint64_type.width *= 2;
2982       lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2983    }
2984    {
2985       struct lp_type int64_type;
2986       int64_type = lp_int_type(type);
2987       int64_type.width *= 2;
2988       lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2989    }
2990    {
2991       struct lp_type uint16_type;
2992       uint16_type = lp_uint_type(type);
2993       uint16_type.width /= 2;
2994       lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2995    }
2996    {
2997       struct lp_type int16_type;
2998       int16_type = lp_int_type(type);
2999       int16_type.width /= 2;
3000       lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
3001    }
3002    {
3003       struct lp_type uint8_type;
3004       uint8_type = lp_uint_type(type);
3005       uint8_type.width /= 4;
3006       lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
3007    }
3008    {
3009       struct lp_type int8_type;
3010       int8_type = lp_int_type(type);
3011       int8_type.width /= 4;
3012       lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
3013    }
3014    bld.bld_base.load_var = emit_load_var;
3015    bld.bld_base.store_var = emit_store_var;
3016    bld.bld_base.load_reg = emit_load_reg;
3017    bld.bld_base.store_reg = emit_store_reg;
3018    bld.bld_base.emit_var_decl = emit_var_decl;
3019    bld.bld_base.load_ubo = emit_load_ubo;
3020    bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
3021    bld.bld_base.load_global = emit_load_global;
3022    bld.bld_base.store_global = emit_store_global;
3023    bld.bld_base.atomic_global = emit_atomic_global;
3024    bld.bld_base.tex = emit_tex;
3025    bld.bld_base.tex_size = emit_tex_size;
3026    bld.bld_base.bgnloop = bgnloop;
3027    bld.bld_base.endloop = endloop;
3028    bld.bld_base.if_cond = if_cond;
3029    bld.bld_base.else_stmt = else_stmt;
3030    bld.bld_base.endif_stmt = endif_stmt;
3031    bld.bld_base.break_stmt = break_stmt;
3032    bld.bld_base.continue_stmt = continue_stmt;
3033    bld.bld_base.sysval_intrin = emit_sysval_intrin;
3034    bld.bld_base.discard = discard;
3035    bld.bld_base.emit_vertex = emit_vertex;
3036    bld.bld_base.end_primitive = end_primitive;
3037    bld.bld_base.load_mem = emit_load_mem;
3038    bld.bld_base.store_mem = emit_store_mem;
3039    bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
3040    bld.bld_base.atomic_mem = emit_atomic_mem;
3041    bld.bld_base.barrier = emit_barrier;
3042    bld.bld_base.image_op = emit_image_op;
3043    bld.bld_base.image_size = emit_image_size;
3044    bld.bld_base.vote = emit_vote;
3045    bld.bld_base.elect = emit_elect;
3046    bld.bld_base.reduce = emit_reduce;
3047    bld.bld_base.ballot = emit_ballot;
3048 #if LLVM_VERSION_MAJOR >= 10
3049    bld.bld_base.shuffle = emit_shuffle;
3050 #endif
3051    bld.bld_base.read_invocation = emit_read_invocation;
3052    bld.bld_base.helper_invocation = emit_helper_invocation;
3053    bld.bld_base.interp_at = emit_interp_at;
3054    bld.bld_base.call = emit_call;
3055    bld.bld_base.load_scratch = emit_load_scratch;
3056    bld.bld_base.store_scratch = emit_store_scratch;
3057    bld.bld_base.load_const = emit_load_const;
3058    bld.bld_base.clock = emit_clock;
3059    bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3060    bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3061 
3062    bld.bld_base.fns = params->fns;
3063    bld.bld_base.func = params->current_func;
3064    bld.mask = params->mask;
3065    bld.inputs = params->inputs;
3066    bld.outputs = outputs;
3067    bld.consts_ptr = params->consts_ptr;
3068    bld.ssbo_ptr = params->ssbo_ptr;
3069    bld.sampler = params->sampler;
3070 
3071    bld.context_type = params->context_type;
3072    bld.context_ptr = params->context_ptr;
3073    bld.resources_type = params->resources_type;
3074    bld.resources_ptr = params->resources_ptr;
3075    bld.thread_data_type = params->thread_data_type;
3076    bld.thread_data_ptr = params->thread_data_ptr;
3077    bld.bld_base.aniso_filter_table = params->aniso_filter_table;
3078    bld.image = params->image;
3079    bld.shared_ptr = params->shared_ptr;
3080    bld.payload_ptr = params->payload_ptr;
3081    bld.coro = params->coro;
3082    bld.kernel_args_ptr = params->kernel_args;
3083    bld.num_inputs = params->num_inputs;
3084    bld.indirects = 0;
3085    if (shader->info.inputs_read_indirectly)
3086       bld.indirects |= nir_var_shader_in;
3087 
3088    bld.gs_iface = params->gs_iface;
3089    bld.tcs_iface = params->tcs_iface;
3090    bld.tes_iface = params->tes_iface;
3091    bld.fs_iface = params->fs_iface;
3092    bld.mesh_iface = params->mesh_iface;
3093    if (bld.gs_iface) {
3094       struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3095 
3096       bld.gs_vertex_streams = params->gs_vertex_streams;
3097       bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3098                                                            shader->info.gs.vertices_out);
3099       for (int i = 0; i < params->gs_vertex_streams; i++) {
3100          bld.emitted_prims_vec_ptr[i] =
3101             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3102          bld.emitted_vertices_vec_ptr[i] =
3103             lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3104          bld.total_emitted_vertices_vec_ptr[i] =
3105             lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3106       }
3107    }
3108    lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3109 
3110    if (params->system_values)
3111       bld.system_values = *params->system_values;
3112 
3113    bld.bld_base.shader = shader;
3114 
3115    bld.scratch_size = ALIGN(shader->scratch_size, 8);
3116    if (params->scratch_ptr)
3117       bld.scratch_ptr = params->scratch_ptr;
3118    else if (shader->scratch_size) {
3119       bld.scratch_ptr = lp_build_array_alloca(gallivm,
3120                                               LLVMInt8TypeInContext(gallivm->context),
3121                                               lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3122                                               "scratch");
3123    }
3124 
3125    if (!exec_list_is_singular(&shader->functions)) {
3126       bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3127       if (!params->call_context_ptr) {
3128          build_call_context(&bld);
3129       } else
3130          bld.call_context_ptr = params->call_context_ptr;
3131    }
3132 
3133    emit_prologue(&bld);
3134    lp_build_nir_llvm(&bld.bld_base, shader, impl);
3135 
3136    if (bld.gs_iface) {
3137       LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3138       LLVMValueRef total_emitted_vertices_vec;
3139       LLVMValueRef emitted_prims_vec;
3140 
3141       for (int i = 0; i < params->gs_vertex_streams; i++) {
3142          end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3143 
3144          total_emitted_vertices_vec =
3145             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3146 
3147          emitted_prims_vec =
3148             LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3149          bld.gs_iface->gs_epilogue(bld.gs_iface,
3150                                    total_emitted_vertices_vec,
3151                                    emitted_prims_vec, i);
3152       }
3153    }
3154    lp_exec_mask_fini(&bld.exec_mask);
3155 }
3156 
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3157 void lp_build_nir_soa(struct gallivm_state *gallivm,
3158                       struct nir_shader *shader,
3159                       const struct lp_build_tgsi_params *params,
3160                       LLVMValueRef (*outputs)[4])
3161 {
3162    lp_build_nir_prepasses(shader);
3163    lp_build_nir_soa_func(gallivm, shader,
3164                          nir_shader_get_entrypoint(shader),
3165                          params, outputs);
3166 }
3167