1 /**************************************************************************
2 *
3 * Copyright 2019 Red Hat.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice shall be included
14 * in all copies or substantial portions of the Software.
15 *
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 *
24 **************************************************************************/
25
26 #include "lp_bld_nir.h"
27 #include "lp_bld_init.h"
28 #include "lp_bld_flow.h"
29 #include "lp_bld_logic.h"
30 #include "lp_bld_gather.h"
31 #include "lp_bld_const.h"
32 #include "lp_bld_struct.h"
33 #include "lp_bld_jit_types.h"
34 #include "lp_bld_arit.h"
35 #include "lp_bld_bitarit.h"
36 #include "lp_bld_coro.h"
37 #include "lp_bld_printf.h"
38 #include "lp_bld_intr.h"
39 #include "util/u_cpu_detect.h"
40 #include "util/u_math.h"
41
bit_size_to_shift_size(int bit_size)42 static int bit_size_to_shift_size(int bit_size)
43 {
44 switch (bit_size) {
45 case 64:
46 return 3;
47 default:
48 case 32:
49 return 2;
50 case 16:
51 return 1;
52 case 8:
53 return 0;
54 }
55 }
56
57 /*
58 * combine the execution mask if there is one with the current mask.
59 */
60 static LLVMValueRef
mask_vec(struct lp_build_nir_context * bld_base)61 mask_vec(struct lp_build_nir_context *bld_base)
62 {
63 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
64 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
65 struct lp_exec_mask *exec_mask = &bld->exec_mask;
66 LLVMValueRef bld_mask = bld->mask ? lp_build_mask_value(bld->mask) : NULL;
67 if (!exec_mask->has_mask) {
68 return bld_mask;
69 }
70 if (!bld_mask)
71 return exec_mask->exec_mask;
72 return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask),
73 exec_mask->exec_mask, "");
74 }
75
76 static bool
invocation_0_must_be_active(struct lp_build_nir_context * bld_base)77 invocation_0_must_be_active(struct lp_build_nir_context *bld_base)
78 {
79 struct lp_build_nir_soa_context * bld = (struct lp_build_nir_soa_context *)bld_base;
80
81 /* Fragment shaders may dispatch with invocation 0 inactive. All other
82 * stages have invocation 0 active at the top. (See
83 * lp_build_tgsi_params.mask setup in draw_llvm.c and lp_state_*.c)
84 */
85 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT)
86 return false;
87
88 /* If we're in some control flow right now, then invocation 0 may be
89 * disabled.
90 */
91 if (bld->exec_mask.has_mask)
92 return false;
93
94 return true;
95 }
96
97 /**
98 * Returns a scalar value of the first active invocation in the exec_mask.
99 *
100 * Note that gallivm doesn't generally jump when exec_mask is 0 (such as if/else
101 * branches thare are all false, or portions of a loop after a break/continue
102 * has ended the last invocation that had been active in the loop). In that
103 * case, we return a 0 value so that unconditional LLVMBuildExtractElement of
104 * the first_active_invocation (such as in memory loads, texture unit index
105 * lookups, etc) will use a valid index
106 */
first_active_invocation(struct lp_build_nir_context * bld_base)107 static LLVMValueRef first_active_invocation(struct lp_build_nir_context *bld_base)
108 {
109 struct gallivm_state *gallivm = bld_base->base.gallivm;
110 LLVMBuilderRef builder = gallivm->builder;
111 struct lp_build_context *uint_bld = &bld_base->uint_bld;
112
113 if (invocation_0_must_be_active(bld_base))
114 return lp_build_const_int32(gallivm, 0);
115
116 LLVMValueRef exec_mask = mask_vec(bld_base);
117
118 LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "exec_bitvec");
119 /* Turn it from N x i1 to iN, then extend it up to i32 so we can use a single
120 * cttz intrinsic -- I assume the compiler will drop the extend if there are
121 * smaller instructions available, since we have is_zero_poison.
122 */
123 bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, uint_bld->type.length), "exec_bitmask");
124 bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
125
126 LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
127
128 LLVMValueRef first_active = lp_build_intrinsic_binary(builder, "llvm.cttz.i32", bld_base->int_bld.elem_type, bitmask,
129 LLVMConstInt(LLVMInt1TypeInContext(gallivm->context), false, false));
130
131 return LLVMBuildSelect(builder, any_active, first_active, lp_build_const_int32(gallivm, 0), "first_active_or_0");
132 }
133
134 static LLVMValueRef
lp_build_zero_bits(struct gallivm_state * gallivm,int bit_size,bool is_float)135 lp_build_zero_bits(struct gallivm_state *gallivm, int bit_size, bool is_float)
136 {
137 if (bit_size == 64)
138 return LLVMConstInt(LLVMInt64TypeInContext(gallivm->context), 0, 0);
139 else if (bit_size == 16)
140 return LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0, 0);
141 else if (bit_size == 8)
142 return LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0, 0);
143 else
144 return is_float ? lp_build_const_float(gallivm, 0) : lp_build_const_int32(gallivm, 0);
145 }
146
147 static LLVMValueRef
emit_fetch_64bit(struct lp_build_nir_context * bld_base,LLVMValueRef input,LLVMValueRef input2)148 emit_fetch_64bit(
149 struct lp_build_nir_context * bld_base,
150 LLVMValueRef input,
151 LLVMValueRef input2)
152 {
153 struct gallivm_state *gallivm = bld_base->base.gallivm;
154 LLVMBuilderRef builder = gallivm->builder;
155 LLVMValueRef res;
156 int i;
157 LLVMValueRef shuffles[2 * (LP_MAX_VECTOR_WIDTH/32)];
158 int len = bld_base->base.type.length * 2;
159 assert(len <= (2 * (LP_MAX_VECTOR_WIDTH/32)));
160
161 for (i = 0; i < bld_base->base.type.length * 2; i+=2) {
162 #if UTIL_ARCH_LITTLE_ENDIAN
163 shuffles[i] = lp_build_const_int32(gallivm, i / 2);
164 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
165 #else
166 shuffles[i] = lp_build_const_int32(gallivm, i / 2 + bld_base->base.type.length);
167 shuffles[i + 1] = lp_build_const_int32(gallivm, i / 2);
168 #endif
169 }
170 res = LLVMBuildShuffleVector(builder, input, input2, LLVMConstVector(shuffles, len), "");
171
172 return LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
173 }
174
175 static void
emit_store_64bit_split(struct lp_build_nir_context * bld_base,LLVMValueRef value,LLVMValueRef split_values[2])176 emit_store_64bit_split(struct lp_build_nir_context *bld_base,
177 LLVMValueRef value,
178 LLVMValueRef split_values[2])
179 {
180 struct gallivm_state *gallivm = bld_base->base.gallivm;
181 LLVMBuilderRef builder = gallivm->builder;
182 unsigned i;
183 LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH/32];
184 LLVMValueRef shuffles2[LP_MAX_VECTOR_WIDTH/32];
185 int len = bld_base->base.type.length * 2;
186
187 value = LLVMBuildBitCast(gallivm->builder, value, LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), len), "");
188 for (i = 0; i < bld_base->base.type.length; i++) {
189 #if UTIL_ARCH_LITTLE_ENDIAN
190 shuffles[i] = lp_build_const_int32(gallivm, i * 2);
191 shuffles2[i] = lp_build_const_int32(gallivm, (i * 2) + 1);
192 #else
193 shuffles[i] = lp_build_const_int32(gallivm, i * 2 + 1);
194 shuffles2[i] = lp_build_const_int32(gallivm, i * 2);
195 #endif
196 }
197
198 split_values[0] = LLVMBuildShuffleVector(builder, value,
199 LLVMGetUndef(LLVMTypeOf(value)),
200 LLVMConstVector(shuffles,
201 bld_base->base.type.length),
202 "");
203 split_values[1] = LLVMBuildShuffleVector(builder, value,
204 LLVMGetUndef(LLVMTypeOf(value)),
205 LLVMConstVector(shuffles2,
206 bld_base->base.type.length),
207 "");
208 }
209
210 static void
emit_store_64bit_chan(struct lp_build_nir_context * bld_base,LLVMValueRef chan_ptr,LLVMValueRef chan_ptr2,LLVMValueRef value)211 emit_store_64bit_chan(struct lp_build_nir_context *bld_base,
212 LLVMValueRef chan_ptr,
213 LLVMValueRef chan_ptr2,
214 LLVMValueRef value)
215 {
216 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
217 struct lp_build_context *float_bld = &bld_base->base;
218 LLVMValueRef split_vals[2];
219
220 emit_store_64bit_split(bld_base, value, split_vals);
221
222 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[0], chan_ptr);
223 lp_exec_mask_store(&bld->exec_mask, float_bld, split_vals[1], chan_ptr2);
224 }
225
226 static LLVMValueRef
get_soa_array_offsets(struct lp_build_context * uint_bld,LLVMValueRef indirect_index,int num_components,unsigned chan_index,bool need_perelement_offset)227 get_soa_array_offsets(struct lp_build_context *uint_bld,
228 LLVMValueRef indirect_index,
229 int num_components,
230 unsigned chan_index,
231 bool need_perelement_offset)
232 {
233 struct gallivm_state *gallivm = uint_bld->gallivm;
234 LLVMValueRef chan_vec =
235 lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index);
236 LLVMValueRef length_vec =
237 lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length);
238 LLVMValueRef index_vec;
239
240 /* index_vec = (indirect_index * num_components + chan_index) * length + offsets */
241 index_vec = lp_build_mul(uint_bld, indirect_index, lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, num_components));
242 index_vec = lp_build_add(uint_bld, index_vec, chan_vec);
243 index_vec = lp_build_mul(uint_bld, index_vec, length_vec);
244
245 if (need_perelement_offset) {
246 LLVMValueRef pixel_offsets;
247 unsigned i;
248 /* build pixel offset vector: {0, 1, 2, 3, ...} */
249 pixel_offsets = uint_bld->undef;
250 for (i = 0; i < uint_bld->type.length; i++) {
251 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
252 pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets,
253 ii, ii, "");
254 }
255 index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets);
256 }
257 return index_vec;
258 }
259
260 static LLVMValueRef
build_gather(struct lp_build_nir_context * bld_base,struct lp_build_context * bld,LLVMTypeRef base_type,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef overflow_mask,LLVMValueRef indexes2)261 build_gather(struct lp_build_nir_context *bld_base,
262 struct lp_build_context *bld,
263 LLVMTypeRef base_type,
264 LLVMValueRef base_ptr,
265 LLVMValueRef indexes,
266 LLVMValueRef overflow_mask,
267 LLVMValueRef indexes2)
268 {
269 struct gallivm_state *gallivm = bld_base->base.gallivm;
270 LLVMBuilderRef builder = gallivm->builder;
271 struct lp_build_context *uint_bld = &bld_base->uint_bld;
272 LLVMValueRef res;
273 unsigned i;
274
275 if (indexes2)
276 res = LLVMGetUndef(LLVMVectorType(LLVMFloatTypeInContext(gallivm->context), bld_base->base.type.length * 2));
277 else
278 res = bld->undef;
279 /*
280 * overflow_mask is a vector telling us which channels
281 * in the vector overflowed. We use the overflow behavior for
282 * constant buffers which is defined as:
283 * Out of bounds access to constant buffer returns 0 in all
284 * components. Out of bounds behavior is always with respect
285 * to the size of the buffer bound at that slot.
286 */
287
288 if (overflow_mask) {
289 /*
290 * We avoid per-element control flow here (also due to llvm going crazy,
291 * though I suspect it's better anyway since overflow is likely rare).
292 * Note that since we still fetch from buffers even if num_elements was
293 * zero (in this case we'll fetch from index zero) the jit func callers
294 * MUST provide valid fake constant buffers of size 4x32 (the values do
295 * not matter), otherwise we'd still need (not per element though)
296 * control flow.
297 */
298 indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes);
299 if (indexes2)
300 indexes2 = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes2);
301 }
302
303 /*
304 * Loop over elements of index_vec, load scalar value, insert it into 'res'.
305 */
306 for (i = 0; i < bld->type.length * (indexes2 ? 2 : 1); i++) {
307 LLVMValueRef si, di;
308 LLVMValueRef index;
309 LLVMValueRef scalar_ptr, scalar;
310
311 di = lp_build_const_int32(gallivm, i);
312 if (indexes2)
313 si = lp_build_const_int32(gallivm, i >> 1);
314 else
315 si = di;
316
317 if (indexes2 && (i & 1)) {
318 index = LLVMBuildExtractElement(builder,
319 indexes2, si, "");
320 } else {
321 index = LLVMBuildExtractElement(builder,
322 indexes, si, "");
323 }
324
325 scalar_ptr = LLVMBuildGEP2(builder, base_type, base_ptr, &index, 1, "gather_ptr");
326 scalar = LLVMBuildLoad2(builder, base_type, scalar_ptr, "");
327
328 res = LLVMBuildInsertElement(builder, res, scalar, di, "");
329 }
330
331 if (overflow_mask) {
332 if (indexes2) {
333 res = LLVMBuildBitCast(builder, res, bld_base->dbl_bld.vec_type, "");
334 overflow_mask = LLVMBuildSExt(builder, overflow_mask,
335 bld_base->dbl_bld.int_vec_type, "");
336 res = lp_build_select(&bld_base->dbl_bld, overflow_mask,
337 bld_base->dbl_bld.zero, res);
338 } else
339 res = lp_build_select(bld, overflow_mask, bld->zero, res);
340 }
341
342 return res;
343 }
344
345 /**
346 * Scatter/store vector.
347 */
348 static void
emit_mask_scatter(struct lp_build_nir_soa_context * bld,LLVMValueRef base_ptr,LLVMValueRef indexes,LLVMValueRef values,struct lp_exec_mask * mask)349 emit_mask_scatter(struct lp_build_nir_soa_context *bld,
350 LLVMValueRef base_ptr,
351 LLVMValueRef indexes,
352 LLVMValueRef values,
353 struct lp_exec_mask *mask)
354 {
355 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
356 LLVMBuilderRef builder = gallivm->builder;
357 unsigned i;
358 LLVMValueRef pred = mask->has_mask ? mask->exec_mask : NULL;
359
360 /*
361 * Loop over elements of index_vec, store scalar value.
362 */
363 for (i = 0; i < bld->bld_base.base.type.length; i++) {
364 LLVMValueRef ii = lp_build_const_int32(gallivm, i);
365 LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, "");
366 LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val");
367 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, LLVMTypeOf(val), base_ptr, &index, 1, "scatter_ptr");
368 LLVMValueRef scalar_pred = pred ?
369 LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL;
370
371 if (0)
372 lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n",
373 ii, val, index, scalar_ptr);
374
375 if (scalar_pred) {
376 LLVMValueRef real_val, dst_val;
377 dst_val = LLVMBuildLoad2(builder, LLVMTypeOf(val), scalar_ptr, "");
378 scalar_pred = LLVMBuildTrunc(builder, scalar_pred, LLVMInt1TypeInContext(gallivm->context), "");
379 real_val = LLVMBuildSelect(builder, scalar_pred, val, dst_val, "");
380 LLVMBuildStore(builder, real_val, scalar_ptr);
381 }
382 else {
383 LLVMBuildStore(builder, val, scalar_ptr);
384 }
385 }
386 }
387
emit_load_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned vertex_index,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])388 static void emit_load_var(struct lp_build_nir_context *bld_base,
389 nir_variable_mode deref_mode,
390 unsigned num_components,
391 unsigned bit_size,
392 nir_variable *var,
393 unsigned vertex_index,
394 LLVMValueRef indir_vertex_index,
395 unsigned const_index,
396 LLVMValueRef indir_index,
397 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
398 {
399 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
400 struct gallivm_state *gallivm = bld_base->base.gallivm;
401 int dmul = bit_size == 64 ? 2 : 1;
402 unsigned location = var->data.driver_location;
403 unsigned location_frac = var->data.location_frac;
404
405 if (!var->data.compact && !indir_index)
406 location += const_index;
407 else if (var->data.compact) {
408 location += const_index / 4;
409 location_frac += const_index % 4;
410 const_index = 0;
411 }
412 switch (deref_mode) {
413 case nir_var_shader_in:
414 for (unsigned i = 0; i < num_components; i++) {
415 int idx = (i * dmul) + location_frac;
416 int comp_loc = location;
417
418 if (bit_size == 64 && idx >= 4) {
419 comp_loc++;
420 idx = idx % 4;
421 }
422
423 if (bld->gs_iface) {
424 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
425 LLVMValueRef attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
426 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
427 LLVMValueRef result2;
428
429 result[i] = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
430 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
431 if (bit_size == 64) {
432 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
433 result2 = bld->gs_iface->fetch_input(bld->gs_iface, &bld_base->base,
434 false, vertex_index_val, 0, attrib_index_val, swizzle_index_val);
435 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
436 }
437 } else if (bld->tes_iface) {
438 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
439 LLVMValueRef attrib_index_val;
440 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
441 LLVMValueRef result2;
442
443 if (indir_index) {
444 if (var->data.compact) {
445 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
446 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
447 } else
448 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
449 } else
450 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
451
452 if (var->data.patch) {
453 result[i] = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
454 indir_index ? true : false, attrib_index_val, swizzle_index_val);
455 if (bit_size == 64) {
456 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
457 result2 = bld->tes_iface->fetch_patch_input(bld->tes_iface, &bld_base->base,
458 indir_index ? true : false, attrib_index_val, swizzle_index_val);
459 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
460 }
461 }
462 else {
463 result[i] = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
464 indir_vertex_index ? true : false,
465 indir_vertex_index ? indir_vertex_index : vertex_index_val,
466 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
467 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
468 if (bit_size == 64) {
469 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
470 result2 = bld->tes_iface->fetch_vertex_input(bld->tes_iface, &bld_base->base,
471 indir_vertex_index ? true : false,
472 indir_vertex_index ? indir_vertex_index : vertex_index_val,
473 indir_index ? true : false, attrib_index_val, false, swizzle_index_val);
474 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
475 }
476 }
477 } else if (bld->tcs_iface) {
478 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
479 LLVMValueRef attrib_index_val;
480 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
481
482 if (indir_index) {
483 if (var->data.compact) {
484 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, idx));
485 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
486 } else
487 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
488 } else
489 attrib_index_val = lp_build_const_int32(gallivm, comp_loc);
490 result[i] = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
491 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
492 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
493 (indir_index && var->data.compact) ? true : false, swizzle_index_val);
494 if (bit_size == 64) {
495 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
496 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_input(bld->tcs_iface, &bld_base->base,
497 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
498 indir_index ? true : false, attrib_index_val,
499 false, swizzle_index_val);
500 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
501 }
502 } else {
503 if (indir_index) {
504 LLVMValueRef attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, comp_loc));
505 LLVMValueRef index_vec = get_soa_array_offsets(&bld_base->uint_bld,
506 attrib_index_val, 4, idx,
507 true);
508 LLVMValueRef index_vec2 = NULL;
509 LLVMTypeRef scalar_type = LLVMFloatTypeInContext(gallivm->context);
510 LLVMValueRef inputs_array = LLVMBuildBitCast(gallivm->builder, bld->inputs_array, LLVMPointerType(scalar_type, 0), "");
511
512 if (bit_size == 64)
513 index_vec2 = get_soa_array_offsets(&bld_base->uint_bld,
514 indir_index, 4, idx + 1, true);
515
516 /* Gather values from the input register array */
517 result[i] = build_gather(bld_base, &bld_base->base, scalar_type, inputs_array, index_vec, NULL, index_vec2);
518 } else {
519 if (bld->indirects & nir_var_shader_in) {
520 LLVMValueRef lindex = lp_build_const_int32(gallivm,
521 comp_loc * 4 + idx);
522 LLVMValueRef input_ptr = lp_build_pointer_get2(gallivm->builder,
523 bld->bld_base.base.vec_type,
524 bld->inputs_array, lindex);
525 if (bit_size == 64) {
526 LLVMValueRef lindex2 = lp_build_const_int32(gallivm,
527 comp_loc * 4 + (idx + 1));
528 LLVMValueRef input_ptr2 = lp_build_pointer_get2(gallivm->builder,
529 bld->bld_base.base.vec_type,
530 bld->inputs_array, lindex2);
531 result[i] = emit_fetch_64bit(bld_base, input_ptr, input_ptr2);
532 } else {
533 result[i] = input_ptr;
534 }
535 } else {
536 if (bit_size == 64) {
537 LLVMValueRef tmp[2];
538 tmp[0] = bld->inputs[comp_loc][idx];
539 tmp[1] = bld->inputs[comp_loc][idx + 1];
540 result[i] = emit_fetch_64bit(bld_base, tmp[0], tmp[1]);
541 } else {
542 result[i] = bld->inputs[comp_loc][idx];
543 }
544 }
545 }
546 }
547 }
548 break;
549 case nir_var_shader_out:
550 if (bld->fs_iface && bld->fs_iface->fb_fetch) {
551 bld->fs_iface->fb_fetch(bld->fs_iface, &bld_base->base, var->data.location, result);
552 return;
553 }
554 for (unsigned i = 0; i < num_components; i++) {
555 int idx = (i * dmul) + location_frac;
556 if (bld->tcs_iface) {
557 LLVMValueRef vertex_index_val = lp_build_const_int32(gallivm, vertex_index);
558 LLVMValueRef attrib_index_val;
559 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx);
560
561 if (indir_index)
562 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, var->data.driver_location));
563 else
564 attrib_index_val = lp_build_const_int32(gallivm, location);
565
566 result[i] = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
567 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
568 (indir_index && !var->data.compact) ? true : false, attrib_index_val,
569 (indir_index && var->data.compact) ? true : false, swizzle_index_val, 0);
570 if (bit_size == 64) {
571 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, idx + 1);
572 LLVMValueRef result2 = bld->tcs_iface->emit_fetch_output(bld->tcs_iface, &bld_base->base,
573 indir_vertex_index ? true : false, indir_vertex_index ? indir_vertex_index : vertex_index_val,
574 indir_index ? true : false, attrib_index_val,
575 false, swizzle_index_val, 0);
576 result[i] = emit_fetch_64bit(bld_base, result[i], result2);
577 }
578 }
579 }
580 break;
581 default:
582 break;
583 }
584 }
585
emit_store_chan(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned bit_size,unsigned location,unsigned comp,unsigned chan,LLVMValueRef dst)586 static void emit_store_chan(struct lp_build_nir_context *bld_base,
587 nir_variable_mode deref_mode,
588 unsigned bit_size,
589 unsigned location, unsigned comp,
590 unsigned chan,
591 LLVMValueRef dst)
592 {
593 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
594 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
595 struct lp_build_context *float_bld = &bld_base->base;
596
597 if (bit_size == 64) {
598 chan *= 2;
599 chan += comp;
600 if (chan >= 4) {
601 chan -= 4;
602 location++;
603 }
604 emit_store_64bit_chan(bld_base, bld->outputs[location][chan],
605 bld->outputs[location][chan + 1], dst);
606 } else {
607 dst = LLVMBuildBitCast(builder, dst, float_bld->vec_type, "");
608 lp_exec_mask_store(&bld->exec_mask, float_bld, dst,
609 bld->outputs[location][chan + comp]);
610 }
611 }
612
emit_store_tcs_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)613 static void emit_store_tcs_chan(struct lp_build_nir_context *bld_base,
614 bool is_compact,
615 unsigned bit_size,
616 unsigned location,
617 unsigned const_index,
618 LLVMValueRef indir_vertex_index,
619 LLVMValueRef indir_index,
620 unsigned comp,
621 unsigned chan,
622 LLVMValueRef chan_val)
623 {
624 struct gallivm_state *gallivm = bld_base->base.gallivm;
625 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
626 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
627 unsigned swizzle = chan;
628 if (bit_size == 64) {
629 swizzle *= 2;
630 swizzle += comp;
631 if (swizzle >= 4) {
632 swizzle -= 4;
633 location++;
634 }
635 } else
636 swizzle += comp;
637 LLVMValueRef attrib_index_val;
638 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
639
640 if (indir_index) {
641 if (is_compact) {
642 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
643 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
644 } else
645 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
646 } else
647 attrib_index_val = lp_build_const_int32(gallivm, const_index + location);
648 LLVMValueRef exec_mask = mask_vec(bld_base);
649 if (bit_size == 64) {
650 LLVMValueRef split_vals[2];
651 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
652 emit_store_64bit_split(bld_base, chan_val, split_vals);
653 if (bld->mesh_iface) {
654 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
655 indir_vertex_index ? true : false,
656 indir_vertex_index,
657 indir_index ? true : false,
658 attrib_index_val,
659 false, swizzle_index_val,
660 split_vals[0], exec_mask);
661 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
662 indir_vertex_index ? true : false,
663 indir_vertex_index,
664 indir_index ? true : false,
665 attrib_index_val,
666 false, swizzle_index_val2,
667 split_vals[1], exec_mask);
668 } else {
669 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
670 indir_vertex_index ? true : false,
671 indir_vertex_index,
672 indir_index ? true : false,
673 attrib_index_val,
674 false, swizzle_index_val,
675 split_vals[0], exec_mask);
676 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
677 indir_vertex_index ? true : false,
678 indir_vertex_index,
679 indir_index ? true : false,
680 attrib_index_val,
681 false, swizzle_index_val2,
682 split_vals[1], exec_mask);
683 }
684 } else {
685 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
686 if (bld->mesh_iface) {
687 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
688 indir_vertex_index ? true : false,
689 indir_vertex_index,
690 indir_index && !is_compact ? true : false,
691 attrib_index_val,
692 indir_index && is_compact ? true : false,
693 swizzle_index_val,
694 chan_val, exec_mask);
695 } else {
696 bld->tcs_iface->emit_store_output(bld->tcs_iface, &bld_base->base, 0,
697 indir_vertex_index ? true : false,
698 indir_vertex_index,
699 indir_index && !is_compact ? true : false,
700 attrib_index_val,
701 indir_index && is_compact ? true : false,
702 swizzle_index_val,
703 chan_val, exec_mask);
704 }
705 }
706 }
707
emit_store_mesh_chan(struct lp_build_nir_context * bld_base,bool is_compact,unsigned bit_size,unsigned location,unsigned const_index,LLVMValueRef indir_vertex_index,LLVMValueRef indir_index,unsigned comp,unsigned chan,LLVMValueRef chan_val)708 static void emit_store_mesh_chan(struct lp_build_nir_context *bld_base,
709 bool is_compact,
710 unsigned bit_size,
711 unsigned location,
712 unsigned const_index,
713 LLVMValueRef indir_vertex_index,
714 LLVMValueRef indir_index,
715 unsigned comp,
716 unsigned chan,
717 LLVMValueRef chan_val)
718 {
719 struct gallivm_state *gallivm = bld_base->base.gallivm;
720 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
721 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
722 unsigned swizzle = chan;
723 if (bit_size == 64) {
724 swizzle += const_index;
725 swizzle *= 2;
726 swizzle += comp;
727 if (swizzle >= 4) {
728 swizzle -= 4;
729 location++;
730 }
731 } else
732 swizzle += comp;
733 LLVMValueRef attrib_index_val;
734 LLVMValueRef swizzle_index_val = lp_build_const_int32(gallivm, swizzle);
735
736 if (indir_index) {
737 if (is_compact) {
738 swizzle_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, swizzle));
739 attrib_index_val = lp_build_const_int32(gallivm, location);
740 } else
741 attrib_index_val = lp_build_add(&bld_base->uint_bld, indir_index, lp_build_const_int_vec(gallivm, bld_base->uint_bld.type, location));
742 } else
743 attrib_index_val = lp_build_const_int32(gallivm, location + const_index);
744 LLVMValueRef exec_mask = mask_vec(bld_base);
745 if (bit_size == 64) {
746 LLVMValueRef split_vals[2];
747 LLVMValueRef swizzle_index_val2 = lp_build_const_int32(gallivm, swizzle + 1);
748 emit_store_64bit_split(bld_base, chan_val, split_vals);
749 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
750 indir_vertex_index ? true : false,
751 indir_vertex_index,
752 indir_index ? true : false,
753 attrib_index_val,
754 false, swizzle_index_val,
755 split_vals[0], exec_mask);
756 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
757 indir_vertex_index ? true : false,
758 indir_vertex_index,
759 indir_index ? true : false,
760 attrib_index_val,
761 false, swizzle_index_val2,
762 split_vals[1], exec_mask);
763 } else {
764 chan_val = LLVMBuildBitCast(builder, chan_val, bld_base->base.vec_type, "");
765 bld->mesh_iface->emit_store_output(bld->mesh_iface, &bld_base->base, 0,
766 indir_vertex_index ? true : false,
767 indir_vertex_index,
768 indir_index && !is_compact ? true : false,
769 attrib_index_val,
770 indir_index && is_compact ? true : false,
771 swizzle_index_val,
772 chan_val, exec_mask);
773 }
774 }
775
emit_store_var(struct lp_build_nir_context * bld_base,nir_variable_mode deref_mode,unsigned num_components,unsigned bit_size,nir_variable * var,unsigned writemask,LLVMValueRef indir_vertex_index,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef dst)776 static void emit_store_var(struct lp_build_nir_context *bld_base,
777 nir_variable_mode deref_mode,
778 unsigned num_components,
779 unsigned bit_size,
780 nir_variable *var,
781 unsigned writemask,
782 LLVMValueRef indir_vertex_index,
783 unsigned const_index,
784 LLVMValueRef indir_index,
785 LLVMValueRef dst)
786 {
787 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
788 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
789 switch (deref_mode) {
790 case nir_var_shader_out: {
791 unsigned location = var->data.driver_location;
792 unsigned comp = var->data.location_frac;
793 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
794 if (var->data.location == FRAG_RESULT_STENCIL)
795 comp = 1;
796 else if (var->data.location == FRAG_RESULT_DEPTH)
797 comp = 2;
798 }
799
800 if (var->data.compact) {
801 location += const_index / 4;
802 comp += const_index % 4;
803 const_index = 0;
804 }
805
806 for (unsigned chan = 0; chan < num_components; chan++) {
807 if (writemask & (1u << chan)) {
808 LLVMValueRef chan_val = (num_components == 1) ? dst : LLVMBuildExtractValue(builder, dst, chan, "");
809 if (bld->mesh_iface) {
810 emit_store_mesh_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
811 } else if (bld->tcs_iface) {
812 emit_store_tcs_chan(bld_base, var->data.compact, bit_size, location, const_index, indir_vertex_index, indir_index, comp, chan, chan_val);
813 } else
814 emit_store_chan(bld_base, deref_mode, bit_size, location + const_index, comp, chan, chan_val);
815 }
816 }
817 break;
818 }
819 default:
820 break;
821 }
822 }
823
824 /**
825 * Returns the address of the given constant array index and channel in a
826 * nir register.
827 */
reg_chan_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,LLVMValueRef reg_storage,int array_index,int chan)828 static LLVMValueRef reg_chan_pointer(struct lp_build_nir_context *bld_base,
829 struct lp_build_context *reg_bld,
830 const nir_intrinsic_instr *decl,
831 LLVMValueRef reg_storage,
832 int array_index, int chan)
833 {
834 struct gallivm_state *gallivm = bld_base->base.gallivm;
835 int nc = nir_intrinsic_num_components(decl);
836 int num_array_elems = nir_intrinsic_num_array_elems(decl);
837
838 LLVMTypeRef chan_type = reg_bld->vec_type;
839 if (nc > 1)
840 chan_type = LLVMArrayType(chan_type, nc);
841
842 if (num_array_elems > 0) {
843 LLVMTypeRef array_type = LLVMArrayType(chan_type, num_array_elems);
844 reg_storage = lp_build_array_get_ptr2(gallivm, array_type, reg_storage,
845 lp_build_const_int32(gallivm, array_index));
846 }
847 if (nc > 1) {
848 reg_storage = lp_build_array_get_ptr2(gallivm, chan_type, reg_storage,
849 lp_build_const_int32(gallivm, chan));
850 }
851
852 return reg_storage;
853 }
854
emit_load_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage)855 static LLVMValueRef emit_load_reg(struct lp_build_nir_context *bld_base,
856 struct lp_build_context *reg_bld,
857 const nir_intrinsic_instr *decl,
858 unsigned base,
859 LLVMValueRef indir_src,
860 LLVMValueRef reg_storage)
861 {
862 struct gallivm_state *gallivm = bld_base->base.gallivm;
863 LLVMBuilderRef builder = gallivm->builder;
864 int nc = nir_intrinsic_num_components(decl);
865 int num_array_elems = nir_intrinsic_num_array_elems(decl);
866 LLVMValueRef vals[NIR_MAX_VEC_COMPONENTS] = { NULL };
867 struct lp_build_context *uint_bld = &bld_base->uint_bld;
868 if (indir_src != NULL) {
869 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
870 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
871 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
872 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
873 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
874 for (unsigned i = 0; i < nc; i++) {
875 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
876 vals[i] = build_gather(bld_base, reg_bld, reg_bld->elem_type, reg_storage, indirect_offset, NULL, NULL);
877 }
878 } else {
879 for (unsigned i = 0; i < nc; i++) {
880 vals[i] = LLVMBuildLoad2(builder, reg_bld->vec_type,
881 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
882 base, i), "");
883 }
884 }
885 return nc == 1 ? vals[0] : lp_nir_array_build_gather_values(builder, vals, nc);
886 }
887
emit_store_reg(struct lp_build_nir_context * bld_base,struct lp_build_context * reg_bld,const nir_intrinsic_instr * decl,unsigned writemask,unsigned base,LLVMValueRef indir_src,LLVMValueRef reg_storage,LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])888 static void emit_store_reg(struct lp_build_nir_context *bld_base,
889 struct lp_build_context *reg_bld,
890 const nir_intrinsic_instr *decl,
891 unsigned writemask,
892 unsigned base,
893 LLVMValueRef indir_src,
894 LLVMValueRef reg_storage,
895 LLVMValueRef dst[NIR_MAX_VEC_COMPONENTS])
896 {
897 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
898 struct gallivm_state *gallivm = bld_base->base.gallivm;
899 LLVMBuilderRef builder = gallivm->builder;
900 struct lp_build_context *uint_bld = &bld_base->uint_bld;
901 int nc = nir_intrinsic_num_components(decl);
902 int num_array_elems = nir_intrinsic_num_array_elems(decl);
903 if (indir_src != NULL) {
904 LLVMValueRef indirect_val = lp_build_const_int_vec(gallivm, uint_bld->type, base);
905 LLVMValueRef max_index = lp_build_const_int_vec(gallivm, uint_bld->type, num_array_elems - 1);
906 indirect_val = LLVMBuildAdd(builder, indirect_val, indir_src, "");
907 indirect_val = lp_build_min(uint_bld, indirect_val, max_index);
908 reg_storage = LLVMBuildBitCast(builder, reg_storage, LLVMPointerType(reg_bld->elem_type, 0), "");
909 for (unsigned i = 0; i < nc; i++) {
910 if (!(writemask & (1 << i)))
911 continue;
912 LLVMValueRef indirect_offset = get_soa_array_offsets(uint_bld, indirect_val, nc, i, true);
913 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
914 emit_mask_scatter(bld, reg_storage, indirect_offset, dst[i], &bld->exec_mask);
915 }
916 return;
917 }
918
919 for (unsigned i = 0; i < nc; i++) {
920 if (!(writemask & (1 << i)))
921 continue;
922 dst[i] = LLVMBuildBitCast(builder, dst[i], reg_bld->vec_type, "");
923 lp_exec_mask_store(&bld->exec_mask, reg_bld, dst[i],
924 reg_chan_pointer(bld_base, reg_bld, decl, reg_storage,
925 base, i));
926 }
927 }
928
emit_load_kernel_arg(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned offset_bit_size,bool offset_is_uniform,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])929 static void emit_load_kernel_arg(struct lp_build_nir_context *bld_base,
930 unsigned nc,
931 unsigned bit_size,
932 unsigned offset_bit_size,
933 bool offset_is_uniform,
934 LLVMValueRef offset,
935 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
936 {
937 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
938 struct gallivm_state *gallivm = bld_base->base.gallivm;
939 LLVMBuilderRef builder = gallivm->builder;
940 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
941 LLVMValueRef kernel_args_ptr = bld->kernel_args_ptr;
942 unsigned size_shift = bit_size_to_shift_size(bit_size);
943 struct lp_build_context *bld_offset = get_int_bld(bld_base, true, offset_bit_size);
944 if (size_shift)
945 offset = lp_build_shr(bld_offset, offset, lp_build_const_int_vec(gallivm, bld_offset->type, size_shift));
946
947 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
948 kernel_args_ptr = LLVMBuildBitCast(builder, kernel_args_ptr, ptr_type, "");
949
950 if (offset_is_uniform) {
951 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
952
953 for (unsigned c = 0; c < nc; c++) {
954 LLVMValueRef this_offset = LLVMBuildAdd(builder, offset, offset_bit_size == 64 ? lp_build_const_int64(gallivm, c) : lp_build_const_int32(gallivm, c), "");
955
956 LLVMValueRef scalar = lp_build_pointer_get2(builder, bld_broad->elem_type, kernel_args_ptr, this_offset);
957 result[c] = lp_build_broadcast_scalar(bld_broad, scalar);
958 }
959 } else {
960 unreachable("load_kernel_arg must have a uniform offset.");
961 }
962 }
963
global_addr_to_ptr(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned bit_size)964 static LLVMValueRef global_addr_to_ptr(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned bit_size)
965 {
966 LLVMBuilderRef builder = gallivm->builder;
967 switch (bit_size) {
968 case 8:
969 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
970 break;
971 case 16:
972 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), "");
973 break;
974 case 32:
975 default:
976 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
977 break;
978 case 64:
979 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), "");
980 break;
981 }
982 return addr_ptr;
983 }
984
global_addr_to_ptr_vec(struct gallivm_state * gallivm,LLVMValueRef addr_ptr,unsigned length,unsigned bit_size)985 static LLVMValueRef global_addr_to_ptr_vec(struct gallivm_state *gallivm, LLVMValueRef addr_ptr, unsigned length, unsigned bit_size)
986 {
987 LLVMBuilderRef builder = gallivm->builder;
988 switch (bit_size) {
989 case 8:
990 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), length), "");
991 break;
992 case 16:
993 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt16TypeInContext(gallivm->context), 0), length), "");
994 break;
995 case 32:
996 default:
997 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), length), "");
998 break;
999 case 64:
1000 addr_ptr = LLVMBuildIntToPtr(builder, addr_ptr, LLVMVectorType(LLVMPointerType(LLVMInt64TypeInContext(gallivm->context), 0), length), "");
1001 break;
1002 }
1003 return addr_ptr;
1004 }
1005
lp_vec_add_offset_ptr(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef ptr,LLVMValueRef offset)1006 static LLVMValueRef lp_vec_add_offset_ptr(struct lp_build_nir_context *bld_base,
1007 unsigned bit_size,
1008 LLVMValueRef ptr,
1009 LLVMValueRef offset)
1010 {
1011 unsigned pointer_size = 8 * sizeof(void *);
1012 struct gallivm_state *gallivm = bld_base->base.gallivm;
1013 LLVMBuilderRef builder = gallivm->builder;
1014 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1015 struct lp_build_context *ptr_bld = get_int_bld(bld_base, true, pointer_size);
1016 LLVMValueRef result = LLVMBuildPtrToInt(builder, ptr, ptr_bld->vec_type, "");
1017 if (pointer_size == 64)
1018 offset = LLVMBuildZExt(builder, offset, ptr_bld->vec_type, "");
1019 result = LLVMBuildAdd(builder, offset, result, "");
1020 return global_addr_to_ptr_vec(gallivm, result, uint_bld->type.length, bit_size);
1021 }
1022
emit_load_global(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,unsigned addr_bit_size,bool offset_is_uniform,LLVMValueRef addr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1023 static void emit_load_global(struct lp_build_nir_context *bld_base,
1024 unsigned nc,
1025 unsigned bit_size,
1026 unsigned addr_bit_size,
1027 bool offset_is_uniform,
1028 LLVMValueRef addr,
1029 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1030 {
1031 struct gallivm_state *gallivm = bld_base->base.gallivm;
1032 LLVMBuilderRef builder = gallivm->builder;
1033 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1034 struct lp_build_context *res_bld;
1035 LLVMValueRef exec_mask = mask_vec(bld_base);
1036
1037 res_bld = get_int_bld(bld_base, true, bit_size);
1038
1039 /* Note, we don't use first_active_invocation here, since we aren't
1040 * guaranteed that there is actually an active invocation.
1041 */
1042 if (offset_is_uniform && invocation_0_must_be_active(bld_base)) {
1043 /* If the offset is uniform, then use the address from invocation 0 to
1044 * load, and broadcast to all invocations.
1045 */
1046 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1047 lp_build_const_int32(gallivm, 0), "");
1048 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, bit_size);
1049
1050 for (unsigned c = 0; c < nc; c++) {
1051 LLVMValueRef scalar = lp_build_pointer_get2(builder, res_bld->elem_type,
1052 addr_ptr, lp_build_const_int32(gallivm, c));
1053 outval[c] = lp_build_broadcast_scalar(res_bld, scalar);
1054 }
1055 return;
1056 }
1057
1058 for (unsigned c = 0; c < nc; c++) {
1059 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1060
1061 outval[c] = lp_build_masked_gather(gallivm, res_bld->type.length,
1062 bit_size,
1063 res_bld->vec_type,
1064 lp_vec_add_offset_ptr(bld_base, bit_size, addr, chan_offset),
1065 exec_mask);
1066 outval[c] = LLVMBuildBitCast(builder, outval[c], res_bld->vec_type, "");
1067 }
1068 }
1069
emit_store_global(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,unsigned addr_bit_size,LLVMValueRef addr,LLVMValueRef dst)1070 static void emit_store_global(struct lp_build_nir_context *bld_base,
1071 unsigned writemask,
1072 unsigned nc, unsigned bit_size,
1073 unsigned addr_bit_size,
1074 LLVMValueRef addr,
1075 LLVMValueRef dst)
1076 {
1077 struct gallivm_state *gallivm = bld_base->base.gallivm;
1078 LLVMBuilderRef builder = gallivm->builder;
1079 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1080 LLVMValueRef exec_mask = mask_vec(bld_base);
1081
1082 for (unsigned c = 0; c < nc; c++) {
1083 if (!(writemask & (1u << c)))
1084 continue;
1085 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1086 LLVMValueRef chan_offset = lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8));
1087
1088 struct lp_build_context *out_bld = get_int_bld(bld_base, false, bit_size);
1089 val = LLVMBuildBitCast(builder, val, out_bld->vec_type, "");
1090 lp_build_masked_scatter(gallivm, out_bld->type.length, bit_size,
1091 lp_vec_add_offset_ptr(bld_base, bit_size,
1092 addr, chan_offset),
1093 val, exec_mask);
1094 }
1095 }
1096
emit_atomic_global(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,unsigned addr_bit_size,unsigned val_bit_size,LLVMValueRef addr,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1097 static void emit_atomic_global(struct lp_build_nir_context *bld_base,
1098 nir_atomic_op nir_op,
1099 unsigned addr_bit_size,
1100 unsigned val_bit_size,
1101 LLVMValueRef addr,
1102 LLVMValueRef val, LLVMValueRef val2,
1103 LLVMValueRef *result)
1104 {
1105 struct gallivm_state *gallivm = bld_base->base.gallivm;
1106 LLVMBuilderRef builder = gallivm->builder;
1107 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1108 bool is_flt = nir_atomic_op_type(nir_op) == nir_type_float;
1109 struct lp_build_context *atom_bld = is_flt ? get_flt_bld(bld_base, val_bit_size) : get_int_bld(bld_base, true, val_bit_size);
1110 if (is_flt)
1111 val = LLVMBuildBitCast(builder, val, atom_bld->vec_type, "");
1112
1113 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1114 atom_bld->vec_type, "");
1115 LLVMValueRef exec_mask = mask_vec(bld_base);
1116 struct lp_build_loop_state loop_state;
1117 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
1118
1119 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1120 loop_state.counter, "");
1121 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atom_bld->elem_type, "");
1122
1123 LLVMValueRef addr_ptr = LLVMBuildExtractElement(gallivm->builder, addr,
1124 loop_state.counter, "");
1125 addr_ptr = global_addr_to_ptr(gallivm, addr_ptr, 32);
1126 struct lp_build_if_state ifthen;
1127 LLVMValueRef cond, temp_res;
1128 LLVMValueRef scalar;
1129 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1130 cond = LLVMBuildExtractElement(gallivm->builder, cond, loop_state.counter, "");
1131 lp_build_if(&ifthen, gallivm, cond);
1132
1133 addr_ptr = LLVMBuildBitCast(gallivm->builder, addr_ptr, LLVMPointerType(LLVMTypeOf(value_ptr), 0), "");
1134 if (val2 != NULL /* compare-and-swap */) {
1135 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1136 loop_state.counter, "");
1137 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atom_bld->elem_type, "");
1138 scalar = LLVMBuildAtomicCmpXchg(builder, addr_ptr, value_ptr,
1139 cas_src_ptr,
1140 LLVMAtomicOrderingSequentiallyConsistent,
1141 LLVMAtomicOrderingSequentiallyConsistent,
1142 false);
1143 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1144 } else {
1145 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1146 addr_ptr, value_ptr,
1147 LLVMAtomicOrderingSequentiallyConsistent,
1148 false);
1149 }
1150 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1151 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, loop_state.counter, "");
1152 LLVMBuildStore(builder, temp_res, atom_res);
1153 lp_build_else(&ifthen);
1154 temp_res = LLVMBuildLoad2(builder, atom_bld->vec_type, atom_res, "");
1155 LLVMValueRef zero_val = lp_build_zero_bits(gallivm, val_bit_size, is_flt);
1156 temp_res = LLVMBuildInsertElement(builder, temp_res, zero_val, loop_state.counter, "");
1157 LLVMBuildStore(builder, temp_res, atom_res);
1158 lp_build_endif(&ifthen);
1159 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, uint_bld->type.length),
1160 NULL, LLVMIntUGE);
1161 *result = LLVMBuildLoad2(builder, LLVMTypeOf(val), atom_res, "");
1162 }
1163
1164 /* Returns a boolean for whether the offset is in range of the given limit for
1165 * SSBO/UBO dereferences.
1166 */
1167 static LLVMValueRef
lp_offset_in_range(struct lp_build_nir_context * bld_base,LLVMValueRef offset,LLVMValueRef limit)1168 lp_offset_in_range(struct lp_build_nir_context *bld_base,
1169 LLVMValueRef offset,
1170 LLVMValueRef limit)
1171 {
1172 struct gallivm_state *gallivm = bld_base->base.gallivm;
1173 LLVMBuilderRef builder = gallivm->builder;
1174
1175 LLVMValueRef fetch_extent = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, 1), "");
1176 LLVMValueRef fetch_in_bounds = LLVMBuildICmp(gallivm->builder, LLVMIntUGE, limit, fetch_extent, "");
1177 LLVMValueRef fetch_non_negative = LLVMBuildICmp(gallivm->builder, LLVMIntSGE, offset, lp_build_const_int32(gallivm, 0), "");
1178 return LLVMBuildAnd(gallivm->builder, fetch_in_bounds, fetch_non_negative, "");
1179 }
1180
1181 static LLVMValueRef
build_resource_to_scalar(struct lp_build_nir_context * bld_base,LLVMValueRef resource)1182 build_resource_to_scalar(struct lp_build_nir_context *bld_base, LLVMValueRef resource)
1183 {
1184 struct gallivm_state *gallivm = bld_base->base.gallivm;
1185
1186 LLVMValueRef invocation = first_active_invocation(bld_base);
1187
1188 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, resource, 0, "");
1189 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1190
1191 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, resource, 1, "");
1192 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1193
1194 LLVMValueRef components[2] = { set, binding };
1195 return lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1196 }
1197
emit_load_ubo(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool offset_is_uniform,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1198 static void emit_load_ubo(struct lp_build_nir_context *bld_base,
1199 unsigned nc,
1200 unsigned bit_size,
1201 bool offset_is_uniform,
1202 LLVMValueRef index,
1203 LLVMValueRef offset,
1204 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1205 {
1206 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind)
1207 index = build_resource_to_scalar(bld_base, index);
1208
1209 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1210 struct gallivm_state *gallivm = bld_base->base.gallivm;
1211 LLVMBuilderRef builder = gallivm->builder;
1212 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1213 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, bit_size);
1214 LLVMValueRef consts_ptr = lp_llvm_buffer_base(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1215 LLVMValueRef num_consts = lp_llvm_buffer_num_elements(gallivm, bld->consts_ptr, index, LP_MAX_TGSI_CONST_BUFFERS);
1216 unsigned size_shift = bit_size_to_shift_size(bit_size);
1217 if (size_shift)
1218 offset = lp_build_shr(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, size_shift));
1219
1220 LLVMTypeRef ptr_type = LLVMPointerType(bld_broad->elem_type, 0);
1221 consts_ptr = LLVMBuildBitCast(builder, consts_ptr, ptr_type, "");
1222
1223 if (offset_is_uniform) {
1224 offset = LLVMBuildExtractElement(builder, offset, first_active_invocation(bld_base), "");
1225 struct lp_build_context *load_bld = get_int_bld(bld_base, true, bit_size);
1226 switch (bit_size) {
1227 case 8:
1228 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 2), "");
1229 break;
1230 case 16:
1231 num_consts = LLVMBuildShl(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1232 break;
1233 case 64:
1234 num_consts = LLVMBuildLShr(gallivm->builder, num_consts, lp_build_const_int32(gallivm, 1), "");
1235 break;
1236 default: break;
1237 }
1238 for (unsigned c = 0; c < nc; c++) {
1239 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1240
1241 LLVMValueRef scalar;
1242 /* If loading outside the UBO, we need to skip the load and read 0 instead. */
1243 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1244 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1245 LLVMBuildStore(builder, zero, res_store);
1246
1247 struct lp_build_if_state ifthen;
1248 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, num_consts));
1249 LLVMBuildStore(builder, lp_build_pointer_get2(builder, bld_broad->elem_type,
1250 consts_ptr, chan_offset), res_store);
1251 lp_build_endif(&ifthen);
1252
1253 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1254
1255 result[c] = lp_build_broadcast_scalar(load_bld, scalar);
1256 }
1257 } else {
1258 LLVMValueRef overflow_mask;
1259
1260 num_consts = lp_build_broadcast_scalar(uint_bld, num_consts);
1261 if (bit_size == 64)
1262 num_consts = lp_build_shr_imm(uint_bld, num_consts, 1);
1263 else if (bit_size == 16)
1264 num_consts = lp_build_shl_imm(uint_bld, num_consts, 1);
1265 else if (bit_size == 8)
1266 num_consts = lp_build_shl_imm(uint_bld, num_consts, 2);
1267
1268 for (unsigned c = 0; c < nc; c++) {
1269 LLVMValueRef this_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c));
1270 overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL,
1271 this_offset, num_consts);
1272 result[c] = build_gather(bld_base, bld_broad, bld_broad->elem_type, consts_ptr, this_offset, overflow_mask, NULL);
1273 }
1274 }
1275 }
1276
1277 static void
emit_load_const(struct lp_build_nir_context * bld_base,const nir_load_const_instr * instr,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1278 emit_load_const(struct lp_build_nir_context *bld_base,
1279 const nir_load_const_instr *instr,
1280 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1281 {
1282 struct lp_build_context *int_bld = get_int_bld(bld_base, true, instr->def.bit_size);
1283 const unsigned bits = instr->def.bit_size;
1284
1285 for (unsigned i = 0; i < instr->def.num_components; i++) {
1286 outval[i] = lp_build_const_int_vec(bld_base->base.gallivm, int_bld->type,
1287 bits == 32 ? instr->value[i].u32
1288 : instr->value[i].u64);
1289 }
1290 for (unsigned i = instr->def.num_components; i < NIR_MAX_VEC_COMPONENTS; i++) {
1291 outval[i] = NULL;
1292 }
1293 }
1294
1295 /**
1296 * Get the base address of SSBO[@index] for the @invocation channel, returning
1297 * the address and also the bounds (in units of the bit_size).
1298 */
1299 static LLVMValueRef
ssbo_base_pointer(struct lp_build_nir_context * bld_base,unsigned bit_size,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1300 ssbo_base_pointer(struct lp_build_nir_context *bld_base,
1301 unsigned bit_size,
1302 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1303 {
1304 struct gallivm_state *gallivm = bld_base->base.gallivm;
1305 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1306 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1307
1308 LLVMValueRef ssbo_idx;
1309 LLVMValueRef buffers;
1310 uint32_t buffers_limit;
1311 if (LLVMGetTypeKind(LLVMTypeOf(index)) == LLVMArrayTypeKind) {
1312 LLVMValueRef set = LLVMBuildExtractValue(gallivm->builder, index, 0, "");
1313 set = LLVMBuildExtractElement(gallivm->builder, set, invocation, "");
1314
1315 LLVMValueRef binding = LLVMBuildExtractValue(gallivm->builder, index, 1, "");
1316 binding = LLVMBuildExtractElement(gallivm->builder, binding, invocation, "");
1317
1318 LLVMValueRef components[2] = { set, binding };
1319 ssbo_idx = lp_nir_array_build_gather_values(gallivm->builder, components, 2);
1320
1321 buffers = bld->consts_ptr;
1322 buffers_limit = LP_MAX_TGSI_CONST_BUFFERS;
1323 } else {
1324 ssbo_idx = LLVMBuildExtractElement(gallivm->builder, index, invocation, "");
1325
1326 buffers = bld->ssbo_ptr;
1327 buffers_limit = LP_MAX_TGSI_SHADER_BUFFERS;
1328 }
1329
1330 LLVMValueRef ssbo_size_ptr = lp_llvm_buffer_num_elements(gallivm, buffers, ssbo_idx, buffers_limit);
1331 LLVMValueRef ssbo_ptr = lp_llvm_buffer_base(gallivm, buffers, ssbo_idx, buffers_limit);
1332 if (bounds)
1333 *bounds = LLVMBuildAShr(gallivm->builder, ssbo_size_ptr, lp_build_const_int32(gallivm, shift_val), "");
1334
1335 return ssbo_ptr;
1336 }
1337
1338 static LLVMValueRef
mem_access_base_pointer(struct lp_build_nir_context * bld_base,struct lp_build_context * mem_bld,unsigned bit_size,bool payload,LLVMValueRef index,LLVMValueRef invocation,LLVMValueRef * bounds)1339 mem_access_base_pointer(struct lp_build_nir_context *bld_base,
1340 struct lp_build_context *mem_bld,
1341 unsigned bit_size, bool payload,
1342 LLVMValueRef index, LLVMValueRef invocation, LLVMValueRef *bounds)
1343 {
1344 struct gallivm_state *gallivm = bld_base->base.gallivm;
1345 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1346 LLVMValueRef ptr;
1347
1348 if (index) {
1349 ptr = ssbo_base_pointer(bld_base, bit_size, index, invocation, bounds);
1350 } else {
1351 if (payload) {
1352 ptr = bld->payload_ptr;
1353 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
1354 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 12), "");
1355 ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
1356 }
1357 else
1358 ptr = bld->shared_ptr;
1359 *bounds = NULL;
1360 }
1361
1362 /* Cast it to the pointer type of the access this instruction is doing. */
1363 if (bit_size == 32 && !mem_bld->type.floating)
1364 return ptr;
1365 else
1366 return LLVMBuildBitCast(gallivm->builder, ptr, LLVMPointerType(mem_bld->elem_type, 0), "");
1367 }
1368
emit_load_mem(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])1369 static void emit_load_mem(struct lp_build_nir_context *bld_base,
1370 unsigned nc,
1371 unsigned bit_size,
1372 bool index_and_offset_are_uniform,
1373 bool payload,
1374 LLVMValueRef index,
1375 LLVMValueRef offset,
1376 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
1377 {
1378 struct gallivm_state *gallivm = bld_base->base.gallivm;
1379 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1380 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1381 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1382 struct lp_build_context *load_bld;
1383 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1384
1385 load_bld = get_int_bld(bld_base, true, bit_size);
1386
1387 offset = LLVMBuildAShr(gallivm->builder, offset, lp_build_const_int_vec(gallivm, uint_bld->type, shift_val), "");
1388
1389 /* If the address is uniform, then use the address from the first active
1390 * invocation 0 to load, and broadcast to all invocations. We can't do
1391 * computed first active invocation for shared accesses (index == NULL),
1392 * though, since those don't do bounds checking and we could use an invalid
1393 * offset if exec_mask == 0.
1394 */
1395 if (index_and_offset_are_uniform && (invocation_0_must_be_active(bld_base) || index)) {
1396 LLVMValueRef ssbo_limit;
1397 LLVMValueRef first_active = first_active_invocation(bld_base);
1398 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1399 first_active, &ssbo_limit);
1400
1401 offset = LLVMBuildExtractElement(gallivm->builder, offset, first_active, "");
1402
1403 for (unsigned c = 0; c < nc; c++) {
1404 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1405
1406 LLVMValueRef scalar;
1407 /* If loading outside the SSBO, we need to skip the load and read 0 instead. */
1408 if (ssbo_limit) {
1409 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1410 LLVMValueRef res_store = lp_build_alloca(gallivm, LLVMTypeOf(zero), "");
1411 LLVMBuildStore(builder, zero, res_store);
1412
1413 struct lp_build_if_state ifthen;
1414 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1415 LLVMBuildStore(builder, lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset), res_store);
1416 lp_build_endif(&ifthen);
1417
1418 scalar = LLVMBuildLoad2(builder, LLVMTypeOf(zero), res_store, "");
1419 } else {
1420 scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, chan_offset);
1421 }
1422
1423 outval[c] = lp_build_broadcast_scalar(load_bld, scalar);
1424 }
1425 return;
1426 }
1427
1428 /* although the index is dynamically uniform that doesn't count if exec mask isn't set, so read the one-by-one */
1429
1430 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS];
1431 for (unsigned c = 0; c < nc; c++)
1432 result[c] = lp_build_alloca(gallivm, load_bld->vec_type, "");
1433
1434 LLVMValueRef exec_mask = mask_vec(bld_base);
1435 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1436 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1437 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1438 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1439
1440 struct lp_build_if_state exec_ifthen;
1441 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1442
1443 LLVMValueRef ssbo_limit;
1444 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, load_bld, bit_size, payload, index,
1445 counter, &ssbo_limit);
1446
1447 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1448
1449 for (unsigned c = 0; c < nc; c++) {
1450 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1451 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1452 if (ssbo_limit) {
1453 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1454 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1455 }
1456
1457 struct lp_build_if_state ifthen;
1458 LLVMValueRef fetch_cond, temp_res;
1459
1460 fetch_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1461
1462 lp_build_if(&ifthen, gallivm, fetch_cond);
1463 LLVMValueRef scalar = lp_build_pointer_get2(builder, load_bld->elem_type, mem_ptr, loop_index);
1464
1465 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1466 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1467 LLVMBuildStore(builder, temp_res, result[c]);
1468 lp_build_else(&ifthen);
1469 temp_res = LLVMBuildLoad2(builder, load_bld->vec_type, result[c], "");
1470 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, false);
1471 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1472 LLVMBuildStore(builder, temp_res, result[c]);
1473 lp_build_endif(&ifthen);
1474 }
1475
1476 lp_build_endif(&exec_ifthen);
1477 }
1478 for (unsigned c = 0; c < nc; c++)
1479 outval[c] = LLVMBuildLoad2(gallivm->builder, load_bld->vec_type, result[c], "");
1480
1481 }
1482
emit_store_mem(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,bool index_and_offset_are_uniform,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef dst)1483 static void emit_store_mem(struct lp_build_nir_context *bld_base,
1484 unsigned writemask,
1485 unsigned nc,
1486 unsigned bit_size,
1487 bool index_and_offset_are_uniform,
1488 bool payload,
1489 LLVMValueRef index,
1490 LLVMValueRef offset,
1491 LLVMValueRef dst)
1492 {
1493 struct gallivm_state *gallivm = bld_base->base.gallivm;
1494 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1495 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1496 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1497 struct lp_build_context *store_bld;
1498 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1499 store_bld = get_int_bld(bld_base, true, bit_size);
1500
1501 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1502
1503 /* If the address is uniform, then just store the value from the first
1504 * channel instead of making LLVM unroll the invocation loop. Note that we
1505 * don't use first_active_uniform(), since we aren't guaranteed that there is
1506 * actually an active invocation.
1507 */
1508 if (index_and_offset_are_uniform && invocation_0_must_be_active(bld_base)) {
1509 LLVMValueRef ssbo_limit;
1510 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1511 lp_build_const_int32(gallivm, 0), &ssbo_limit);
1512
1513 offset = LLVMBuildExtractElement(gallivm->builder, offset, lp_build_const_int32(gallivm, 0), "");
1514
1515 for (unsigned c = 0; c < nc; c++) {
1516 if (!(writemask & (1u << c)))
1517 continue;
1518
1519 /* Pick out invocation 0's value. */
1520 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1521 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1522 lp_build_const_int32(gallivm, 0), "");
1523 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1524
1525 LLVMValueRef chan_offset = LLVMBuildAdd(builder, offset, lp_build_const_int32(gallivm, c), "");
1526
1527 /* If storing outside the SSBO, we need to skip the store instead. */
1528 if (ssbo_limit) {
1529 struct lp_build_if_state ifthen;
1530 lp_build_if(&ifthen, gallivm, lp_offset_in_range(bld_base, chan_offset, ssbo_limit));
1531 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1532 lp_build_endif(&ifthen);
1533 } else {
1534 lp_build_pointer_set(builder, mem_ptr, chan_offset, value_ptr);
1535 }
1536 }
1537 return;
1538 }
1539
1540 LLVMValueRef exec_mask = mask_vec(bld_base);
1541 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1542 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1543 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1544 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1545
1546 struct lp_build_if_state exec_ifthen;
1547 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1548
1549 LLVMValueRef ssbo_limit;
1550 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, store_bld, bit_size, payload, index,
1551 counter, &ssbo_limit);
1552
1553 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1554
1555 for (unsigned c = 0; c < nc; c++) {
1556 if (!(writemask & (1u << c)))
1557 continue;
1558 LLVMValueRef loop_index = LLVMBuildAdd(builder, loop_offset, lp_build_const_int32(gallivm, c), "");
1559 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
1560 LLVMValueRef do_store = lp_build_const_int32(gallivm, -1);
1561
1562 if (ssbo_limit) {
1563 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_index, ssbo_limit);
1564 do_store = LLVMBuildAnd(builder, do_store, ssbo_oob_cmp, "");
1565 }
1566
1567 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1568 counter, "");
1569 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, store_bld->elem_type, "");
1570 struct lp_build_if_state ifthen;
1571 LLVMValueRef store_cond;
1572
1573 store_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_store, lp_build_const_int32(gallivm, 0), "");
1574 lp_build_if(&ifthen, gallivm, store_cond);
1575 lp_build_pointer_set(builder, mem_ptr, loop_index, value_ptr);
1576 lp_build_endif(&ifthen);
1577 }
1578
1579 lp_build_endif(&exec_ifthen);
1580 }
1581 }
1582
1583
emit_atomic_mem(struct lp_build_nir_context * bld_base,nir_atomic_op nir_op,uint32_t bit_size,bool payload,LLVMValueRef index,LLVMValueRef offset,LLVMValueRef val,LLVMValueRef val2,LLVMValueRef * result)1584 static void emit_atomic_mem(struct lp_build_nir_context *bld_base,
1585 nir_atomic_op nir_op,
1586 uint32_t bit_size,
1587 bool payload,
1588 LLVMValueRef index, LLVMValueRef offset,
1589 LLVMValueRef val, LLVMValueRef val2,
1590 LLVMValueRef *result)
1591 {
1592 struct gallivm_state *gallivm = bld_base->base.gallivm;
1593 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1594 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
1595 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1596 uint32_t shift_val = bit_size_to_shift_size(bit_size);
1597 bool is_float = nir_atomic_op_type(nir_op) == nir_type_float;
1598 struct lp_build_context *atomic_bld = is_float ? get_flt_bld(bld_base, bit_size) : get_int_bld(bld_base, true, bit_size);
1599
1600 offset = lp_build_shr_imm(uint_bld, offset, shift_val);
1601 LLVMValueRef atom_res = lp_build_alloca(gallivm,
1602 atomic_bld->vec_type, "");
1603
1604 LLVMValueRef exec_mask = mask_vec(bld_base);
1605 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, exec_mask, uint_bld->zero, "");
1606 for (unsigned i = 0; i < uint_bld->type.length; i++) {
1607 LLVMValueRef counter = lp_build_const_int32(gallivm, i);
1608 LLVMValueRef loop_cond = LLVMBuildExtractElement(gallivm->builder, cond, counter, "");
1609
1610 struct lp_build_if_state exec_ifthen;
1611 lp_build_if(&exec_ifthen, gallivm, loop_cond);
1612
1613 LLVMValueRef ssbo_limit;
1614 LLVMValueRef mem_ptr = mem_access_base_pointer(bld_base, atomic_bld, bit_size, payload, index,
1615 counter, &ssbo_limit);
1616
1617 LLVMValueRef loop_offset = LLVMBuildExtractElement(gallivm->builder, offset, counter, "");
1618
1619 LLVMValueRef do_fetch = lp_build_const_int32(gallivm, -1);
1620 if (ssbo_limit) {
1621 LLVMValueRef ssbo_oob_cmp = lp_build_compare(gallivm, lp_elem_type(uint_bld->type), PIPE_FUNC_LESS, loop_offset, ssbo_limit);
1622 do_fetch = LLVMBuildAnd(builder, do_fetch, ssbo_oob_cmp, "");
1623 }
1624
1625 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, val,
1626 counter, "");
1627 value_ptr = LLVMBuildBitCast(gallivm->builder, value_ptr, atomic_bld->elem_type, "");
1628
1629 LLVMValueRef scalar_ptr = LLVMBuildGEP2(builder, atomic_bld->elem_type, mem_ptr, &loop_offset, 1, "");
1630
1631 struct lp_build_if_state ifthen;
1632 LLVMValueRef inner_cond, temp_res;
1633 LLVMValueRef scalar;
1634
1635 inner_cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, do_fetch, lp_build_const_int32(gallivm, 0), "");
1636 lp_build_if(&ifthen, gallivm, inner_cond);
1637
1638 if (val2 != NULL) {
1639 LLVMValueRef cas_src_ptr = LLVMBuildExtractElement(gallivm->builder, val2,
1640 counter, "");
1641 cas_src_ptr = LLVMBuildBitCast(gallivm->builder, cas_src_ptr, atomic_bld->elem_type, "");
1642 scalar = LLVMBuildAtomicCmpXchg(builder, scalar_ptr, value_ptr,
1643 cas_src_ptr,
1644 LLVMAtomicOrderingSequentiallyConsistent,
1645 LLVMAtomicOrderingSequentiallyConsistent,
1646 false);
1647 scalar = LLVMBuildExtractValue(gallivm->builder, scalar, 0, "");
1648 } else {
1649 scalar = LLVMBuildAtomicRMW(builder, lp_translate_atomic_op(nir_op),
1650 scalar_ptr, value_ptr,
1651 LLVMAtomicOrderingSequentiallyConsistent,
1652 false);
1653 }
1654 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1655 temp_res = LLVMBuildInsertElement(builder, temp_res, scalar, counter, "");
1656 LLVMBuildStore(builder, temp_res, atom_res);
1657 lp_build_else(&ifthen);
1658 temp_res = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1659 LLVMValueRef zero = lp_build_zero_bits(gallivm, bit_size, is_float);
1660 temp_res = LLVMBuildInsertElement(builder, temp_res, zero, counter, "");
1661 LLVMBuildStore(builder, temp_res, atom_res);
1662 lp_build_endif(&ifthen);
1663
1664 lp_build_endif(&exec_ifthen);
1665 }
1666 *result = LLVMBuildLoad2(builder, atomic_bld->vec_type, atom_res, "");
1667 }
1668
emit_barrier(struct lp_build_nir_context * bld_base)1669 static void emit_barrier(struct lp_build_nir_context *bld_base)
1670 {
1671 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1672 struct gallivm_state * gallivm = bld_base->base.gallivm;
1673
1674 LLVMBasicBlockRef resume = lp_build_insert_new_block(gallivm, "resume");
1675
1676 lp_build_coro_suspend_switch(gallivm, bld->coro, resume, false);
1677 LLVMPositionBuilderAtEnd(gallivm->builder, resume);
1678 }
1679
emit_get_ssbo_size(struct lp_build_nir_context * bld_base,LLVMValueRef index)1680 static LLVMValueRef emit_get_ssbo_size(struct lp_build_nir_context *bld_base,
1681 LLVMValueRef index)
1682 {
1683 struct lp_build_context *bld_broad = &bld_base->uint_bld;
1684
1685 LLVMValueRef size;
1686 ssbo_base_pointer(bld_base, 8, index, first_active_invocation(bld_base), &size);
1687
1688 return lp_build_broadcast_scalar(bld_broad, size);
1689 }
1690
emit_image_op(struct lp_build_nir_context * bld_base,struct lp_img_params * params)1691 static void emit_image_op(struct lp_build_nir_context *bld_base,
1692 struct lp_img_params *params)
1693 {
1694 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1695 struct gallivm_state *gallivm = bld_base->base.gallivm;
1696
1697 params->type = bld_base->base.type;
1698 params->resources_type = bld->resources_type;
1699 params->resources_ptr = bld->resources_ptr;
1700 params->thread_data_type = bld->thread_data_type;
1701 params->thread_data_ptr = bld->thread_data_ptr;
1702 params->exec_mask = mask_vec(bld_base);
1703
1704 if (params->image_index_offset)
1705 params->image_index_offset = LLVMBuildExtractElement(gallivm->builder, params->image_index_offset,
1706 first_active_invocation(bld_base), "");
1707
1708 if (params->resource)
1709 params->resource = build_resource_to_scalar(bld_base, params->resource);
1710
1711 bld->image->emit_op(bld->image,
1712 bld->bld_base.base.gallivm,
1713 params);
1714
1715 }
1716
emit_image_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1717 static void emit_image_size(struct lp_build_nir_context *bld_base,
1718 struct lp_sampler_size_query_params *params)
1719 {
1720 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1721 struct gallivm_state *gallivm = bld_base->base.gallivm;
1722
1723 params->int_type = bld_base->int_bld.type;
1724 params->resources_type = bld->resources_type;
1725 params->resources_ptr = bld->resources_ptr;
1726 if (params->texture_unit_offset)
1727 params->texture_unit_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_unit_offset,
1728 first_active_invocation(bld_base), "");
1729 bld->image->emit_size_query(bld->image,
1730 bld->bld_base.base.gallivm,
1731 params);
1732
1733 }
1734
init_var_slots(struct lp_build_nir_context * bld_base,nir_variable * var,unsigned sc)1735 static void init_var_slots(struct lp_build_nir_context *bld_base,
1736 nir_variable *var, unsigned sc)
1737 {
1738 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1739 unsigned slots = glsl_count_attribute_slots(var->type, false) * 4;
1740
1741 if (!bld->outputs)
1742 return;
1743 for (unsigned comp = sc; comp < slots + sc; comp++) {
1744 unsigned this_loc = var->data.driver_location + (comp / 4);
1745 unsigned this_chan = comp % 4;
1746
1747 if (!bld->outputs[this_loc][this_chan])
1748 bld->outputs[this_loc][this_chan] = lp_build_alloca(bld_base->base.gallivm,
1749 bld_base->base.vec_type, "output");
1750 }
1751 }
1752
emit_var_decl(struct lp_build_nir_context * bld_base,nir_variable * var)1753 static void emit_var_decl(struct lp_build_nir_context *bld_base,
1754 nir_variable *var)
1755 {
1756 unsigned sc = var->data.location_frac;
1757 switch (var->data.mode) {
1758 case nir_var_shader_out: {
1759 if (bld_base->shader->info.stage == MESA_SHADER_FRAGMENT) {
1760 if (var->data.location == FRAG_RESULT_STENCIL)
1761 sc = 1;
1762 else if (var->data.location == FRAG_RESULT_DEPTH)
1763 sc = 2;
1764 }
1765 init_var_slots(bld_base, var, sc);
1766 break;
1767 }
1768 default:
1769 break;
1770 }
1771 }
1772
emit_tex(struct lp_build_nir_context * bld_base,struct lp_sampler_params * params)1773 static void emit_tex(struct lp_build_nir_context *bld_base,
1774 struct lp_sampler_params *params)
1775 {
1776 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1777 struct gallivm_state *gallivm = bld_base->base.gallivm;
1778
1779 params->type = bld_base->base.type;
1780 params->resources_type = bld->resources_type;
1781 params->resources_ptr = bld->resources_ptr;
1782 params->thread_data_type = bld->thread_data_type;
1783 params->thread_data_ptr = bld->thread_data_ptr;
1784 params->exec_mask = mask_vec(bld_base);
1785
1786 if (params->texture_index_offset && bld_base->shader->info.stage != MESA_SHADER_FRAGMENT) {
1787 /* this is horrible but this can be dynamic */
1788 LLVMValueRef coords[5];
1789 LLVMValueRef *orig_texel_ptr;
1790 struct lp_build_context *uint_bld = &bld_base->uint_bld;
1791 LLVMValueRef result[4] = { LLVMGetUndef(bld_base->base.vec_type),
1792 LLVMGetUndef(bld_base->base.vec_type),
1793 LLVMGetUndef(bld_base->base.vec_type),
1794 LLVMGetUndef(bld_base->base.vec_type) };
1795 LLVMValueRef texel[4], orig_offset, orig_lod;
1796 unsigned i;
1797 orig_texel_ptr = params->texel;
1798 orig_lod = params->lod;
1799 for (i = 0; i < 5; i++) {
1800 coords[i] = params->coords[i];
1801 }
1802 orig_offset = params->texture_index_offset;
1803
1804 for (unsigned v = 0; v < uint_bld->type.length; v++) {
1805 LLVMValueRef idx = lp_build_const_int32(gallivm, v);
1806 LLVMValueRef new_coords[5];
1807 for (i = 0; i < 5; i++) {
1808 new_coords[i] = LLVMBuildExtractElement(gallivm->builder,
1809 coords[i], idx, "");
1810 }
1811 params->coords = new_coords;
1812 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder,
1813 orig_offset,
1814 idx, "");
1815 params->type = lp_elem_type(bld_base->base.type);
1816
1817 if (orig_lod)
1818 params->lod = LLVMBuildExtractElement(gallivm->builder, orig_lod, idx, "");
1819 params->texel = texel;
1820 bld->sampler->emit_tex_sample(bld->sampler,
1821 gallivm,
1822 params);
1823
1824 for (i = 0; i < 4; i++) {
1825 result[i] = LLVMBuildInsertElement(gallivm->builder, result[i], texel[i], idx, "");
1826 }
1827 }
1828 for (i = 0; i < 4; i++) {
1829 orig_texel_ptr[i] = result[i];
1830 }
1831 return;
1832 }
1833
1834 if (params->texture_index_offset) {
1835 params->texture_index_offset = LLVMBuildExtractElement(gallivm->builder, params->texture_index_offset,
1836 first_active_invocation(bld_base), "");
1837 }
1838
1839 if (params->texture_resource)
1840 params->texture_resource = build_resource_to_scalar(bld_base, params->texture_resource);
1841
1842 if (params->sampler_resource)
1843 params->sampler_resource = build_resource_to_scalar(bld_base, params->sampler_resource);
1844
1845 params->type = bld_base->base.type;
1846 bld->sampler->emit_tex_sample(bld->sampler,
1847 bld->bld_base.base.gallivm,
1848 params);
1849 }
1850
emit_tex_size(struct lp_build_nir_context * bld_base,struct lp_sampler_size_query_params * params)1851 static void emit_tex_size(struct lp_build_nir_context *bld_base,
1852 struct lp_sampler_size_query_params *params)
1853 {
1854 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1855
1856 params->int_type = bld_base->int_bld.type;
1857 params->resources_type = bld->resources_type;
1858 params->resources_ptr = bld->resources_ptr;
1859 if (params->texture_unit_offset)
1860 params->texture_unit_offset = LLVMBuildExtractElement(bld_base->base.gallivm->builder,
1861 params->texture_unit_offset,
1862 lp_build_const_int32(bld_base->base.gallivm, 0), "");
1863
1864 params->exec_mask = mask_vec(bld_base);
1865 if (params->resource)
1866 params->resource = build_resource_to_scalar(bld_base, params->resource);
1867
1868 bld->sampler->emit_size_query(bld->sampler,
1869 bld->bld_base.base.gallivm,
1870 params);
1871 }
1872
get_local_invocation_index(struct lp_build_nir_soa_context * bld)1873 static LLVMValueRef get_local_invocation_index(struct lp_build_nir_soa_context *bld)
1874 {
1875 struct lp_build_nir_context *bld_base = &bld->bld_base;
1876 LLVMValueRef tmp, tmp2;
1877
1878 tmp = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[1]);
1879 tmp2 = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[0]);
1880 tmp = lp_build_mul(&bld_base->uint_bld, tmp, tmp2);
1881 tmp = lp_build_mul(&bld_base->uint_bld, tmp, bld->system_values.thread_id[2]);
1882
1883 tmp2 = lp_build_mul(&bld_base->uint_bld, tmp2, bld->system_values.thread_id[1]);
1884 tmp = lp_build_add(&bld_base->uint_bld, tmp, tmp2);
1885 tmp = lp_build_add(&bld_base->uint_bld, tmp, bld->system_values.thread_id[0]);
1886 return tmp;
1887 }
1888
emit_sysval_intrin(struct lp_build_nir_context * bld_base,nir_intrinsic_instr * instr,LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])1889 static void emit_sysval_intrin(struct lp_build_nir_context *bld_base,
1890 nir_intrinsic_instr *instr,
1891 LLVMValueRef result[NIR_MAX_VEC_COMPONENTS])
1892 {
1893 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
1894 struct gallivm_state *gallivm = bld_base->base.gallivm;
1895 struct lp_build_context *bld_broad = get_int_bld(bld_base, true, instr->def.bit_size);
1896 switch (instr->intrinsic) {
1897 case nir_intrinsic_load_instance_id:
1898 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id);
1899 break;
1900 case nir_intrinsic_load_base_instance:
1901 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.base_instance);
1902 break;
1903 case nir_intrinsic_load_base_vertex:
1904 result[0] = bld->system_values.basevertex;
1905 break;
1906 case nir_intrinsic_load_first_vertex:
1907 result[0] = bld->system_values.firstvertex;
1908 break;
1909 case nir_intrinsic_load_vertex_id:
1910 result[0] = bld->system_values.vertex_id;
1911 break;
1912 case nir_intrinsic_load_primitive_id:
1913 result[0] = bld->system_values.prim_id;
1914 break;
1915 case nir_intrinsic_load_workgroup_id: {
1916 LLVMValueRef tmp[3];
1917 for (unsigned i = 0; i < 3; i++) {
1918 tmp[i] = bld->system_values.block_id[i];
1919 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1920 }
1921 break;
1922 }
1923 case nir_intrinsic_load_local_invocation_id:
1924 for (unsigned i = 0; i < 3; i++)
1925 result[i] = bld->system_values.thread_id[i];
1926 break;
1927 case nir_intrinsic_load_local_invocation_index:
1928 result[0] = get_local_invocation_index(bld);
1929 break;
1930 case nir_intrinsic_load_num_workgroups: {
1931 LLVMValueRef tmp[3];
1932 for (unsigned i = 0; i < 3; i++) {
1933 tmp[i] = bld->system_values.grid_size[i];
1934 result[i] = lp_build_broadcast_scalar(bld_broad, tmp[i]);
1935 }
1936 break;
1937 }
1938 case nir_intrinsic_load_invocation_id:
1939 if (bld_base->shader->info.stage == MESA_SHADER_TESS_CTRL)
1940 result[0] = bld->system_values.invocation_id;
1941 else
1942 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.invocation_id);
1943 break;
1944 case nir_intrinsic_load_front_face:
1945 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.front_facing);
1946 break;
1947 case nir_intrinsic_load_draw_id:
1948 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.draw_id);
1949 break;
1950 default:
1951 break;
1952 case nir_intrinsic_load_workgroup_size:
1953 for (unsigned i = 0; i < 3; i++)
1954 result[i] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.block_size[i]);
1955 break;
1956 case nir_intrinsic_load_work_dim:
1957 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.work_dim);
1958 break;
1959 case nir_intrinsic_load_tess_coord:
1960 for (unsigned i = 0; i < 3; i++) {
1961 result[i] = LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_coord, i, "");
1962 }
1963 break;
1964 case nir_intrinsic_load_tess_level_outer:
1965 for (unsigned i = 0; i < 4; i++)
1966 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_outer, i, ""));
1967 break;
1968 case nir_intrinsic_load_tess_level_inner:
1969 for (unsigned i = 0; i < 2; i++)
1970 result[i] = lp_build_broadcast_scalar(&bld_base->base, LLVMBuildExtractValue(gallivm->builder, bld->system_values.tess_inner, i, ""));
1971 break;
1972 case nir_intrinsic_load_patch_vertices_in:
1973 result[0] = bld->system_values.vertices_in;
1974 break;
1975 case nir_intrinsic_load_sample_id:
1976 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.sample_id);
1977 break;
1978 case nir_intrinsic_load_sample_pos:
1979 for (unsigned i = 0; i < 2; i++) {
1980 LLVMValueRef idx = LLVMBuildMul(gallivm->builder, bld->system_values.sample_id, lp_build_const_int32(gallivm, 2), "");
1981 idx = LLVMBuildAdd(gallivm->builder, idx, lp_build_const_int32(gallivm, i), "");
1982 LLVMValueRef val = lp_build_array_get2(gallivm, bld->system_values.sample_pos_type,
1983 bld->system_values.sample_pos, idx);
1984 result[i] = lp_build_broadcast_scalar(&bld_base->base, val);
1985 }
1986 break;
1987 case nir_intrinsic_load_sample_mask_in:
1988 result[0] = bld->system_values.sample_mask_in;
1989 break;
1990 case nir_intrinsic_load_view_index:
1991 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.view_index);
1992 break;
1993 case nir_intrinsic_load_subgroup_invocation: {
1994 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
1995 for(unsigned i = 0; i < bld->bld_base.base.type.length; ++i)
1996 elems[i] = lp_build_const_int32(gallivm, i);
1997 result[0] = LLVMConstVector(elems, bld->bld_base.base.type.length);
1998 break;
1999 }
2000 case nir_intrinsic_load_subgroup_id:
2001 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.subgroup_id);
2002 break;
2003 case nir_intrinsic_load_num_subgroups:
2004 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.num_subgroups);
2005 break;
2006 }
2007 }
2008
emit_helper_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef * dst)2009 static void emit_helper_invocation(struct lp_build_nir_context *bld_base,
2010 LLVMValueRef *dst)
2011 {
2012 struct gallivm_state *gallivm = bld_base->base.gallivm;
2013 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2014 *dst = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, mask_vec(bld_base), lp_build_const_int_vec(gallivm, uint_bld->type, -1));
2015 }
2016
bgnloop(struct lp_build_nir_context * bld_base)2017 static void bgnloop(struct lp_build_nir_context *bld_base)
2018 {
2019 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2020 lp_exec_bgnloop(&bld->exec_mask, true);
2021 }
2022
endloop(struct lp_build_nir_context * bld_base)2023 static void endloop(struct lp_build_nir_context *bld_base)
2024 {
2025 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2026 lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask, bld->mask);
2027 }
2028
lp_build_skip_branch(struct lp_build_nir_context * bld_base,bool flatten)2029 static void lp_build_skip_branch(struct lp_build_nir_context *bld_base, bool flatten)
2030 {
2031 if (flatten)
2032 return;
2033
2034 struct gallivm_state *gallivm = bld_base->base.gallivm;
2035 LLVMBuilderRef builder = gallivm->builder;
2036
2037 LLVMValueRef exec_mask = mask_vec(bld_base);
2038
2039 LLVMValueRef bitmask = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2040 bitmask = LLVMBuildBitCast(builder, bitmask, LLVMIntTypeInContext(gallivm->context, bld_base->uint_bld.type.length), "");
2041 bitmask = LLVMBuildZExt(builder, bitmask, bld_base->int_bld.elem_type, "");
2042
2043 LLVMValueRef any_active = LLVMBuildICmp(builder, LLVMIntNE, bitmask, lp_build_const_int32(gallivm, 0), "any_active");
2044
2045 assert(bld_base->if_stack_size < LP_MAX_TGSI_NESTING);
2046 lp_build_if(&bld_base->if_stack[bld_base->if_stack_size], gallivm, any_active);
2047 bld_base->if_stack_size++;
2048 }
2049
lp_build_skip_branch_end(struct lp_build_nir_context * bld_base,bool flatten)2050 static void lp_build_skip_branch_end(struct lp_build_nir_context *bld_base, bool flatten)
2051 {
2052 if (flatten)
2053 return;
2054
2055 assert(bld_base->if_stack_size);
2056 bld_base->if_stack_size--;
2057 lp_build_endif(&bld_base->if_stack[bld_base->if_stack_size]);
2058 }
2059
if_cond(struct lp_build_nir_context * bld_base,LLVMValueRef cond,bool flatten)2060 static void if_cond(struct lp_build_nir_context *bld_base, LLVMValueRef cond, bool flatten)
2061 {
2062 struct gallivm_state *gallivm = bld_base->base.gallivm;
2063
2064 LLVMBuilderRef builder = gallivm->builder;
2065 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2066 lp_exec_mask_cond_push(&bld->exec_mask, LLVMBuildBitCast(builder, cond, bld_base->base.int_vec_type, ""));
2067
2068 lp_build_skip_branch(bld_base, flatten);
2069 }
2070
else_stmt(struct lp_build_nir_context * bld_base,bool flatten_then,bool flatten_else)2071 static void else_stmt(struct lp_build_nir_context *bld_base, bool flatten_then, bool flatten_else)
2072 {
2073 lp_build_skip_branch_end(bld_base, flatten_then);
2074
2075 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2076 lp_exec_mask_cond_invert(&bld->exec_mask);
2077
2078 lp_build_skip_branch(bld_base, flatten_else);
2079 }
2080
endif_stmt(struct lp_build_nir_context * bld_base,bool flatten)2081 static void endif_stmt(struct lp_build_nir_context *bld_base, bool flatten)
2082 {
2083 lp_build_skip_branch_end(bld_base, flatten);
2084
2085 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2086 lp_exec_mask_cond_pop(&bld->exec_mask);
2087 }
2088
break_stmt(struct lp_build_nir_context * bld_base)2089 static void break_stmt(struct lp_build_nir_context *bld_base)
2090 {
2091 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2092
2093 lp_exec_break(&bld->exec_mask, NULL, false);
2094 }
2095
continue_stmt(struct lp_build_nir_context * bld_base)2096 static void continue_stmt(struct lp_build_nir_context *bld_base)
2097 {
2098 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2099 lp_exec_continue(&bld->exec_mask);
2100 }
2101
discard(struct lp_build_nir_context * bld_base,LLVMValueRef cond)2102 static void discard(struct lp_build_nir_context *bld_base, LLVMValueRef cond)
2103 {
2104 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2105 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2106 LLVMValueRef mask;
2107
2108 if (!cond) {
2109 if (bld->exec_mask.has_mask) {
2110 mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2111 } else {
2112 mask = LLVMConstNull(bld->bld_base.base.int_vec_type);
2113 }
2114 } else {
2115 mask = LLVMBuildNot(builder, cond, "");
2116 if (bld->exec_mask.has_mask) {
2117 LLVMValueRef invmask;
2118 invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp");
2119 mask = LLVMBuildOr(builder, mask, invmask, "");
2120 }
2121 }
2122 lp_build_mask_update(bld->mask, mask);
2123 }
2124
2125 static void
increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2126 increment_vec_ptr_by_mask(struct lp_build_nir_context * bld_base,
2127 LLVMValueRef ptr,
2128 LLVMValueRef mask)
2129 {
2130 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2131 LLVMValueRef current_vec = LLVMBuildLoad2(builder, LLVMTypeOf(mask), ptr, "");
2132
2133 current_vec = LLVMBuildSub(builder, current_vec, mask, "");
2134
2135 LLVMBuildStore(builder, current_vec, ptr);
2136 }
2137
2138 static void
clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,LLVMValueRef ptr,LLVMValueRef mask)2139 clear_uint_vec_ptr_from_mask(struct lp_build_nir_context * bld_base,
2140 LLVMValueRef ptr,
2141 LLVMValueRef mask)
2142 {
2143 LLVMBuilderRef builder = bld_base->base.gallivm->builder;
2144 LLVMValueRef current_vec = LLVMBuildLoad2(builder, bld_base->uint_bld.vec_type, ptr, "");
2145
2146 current_vec = lp_build_select(&bld_base->uint_bld,
2147 mask,
2148 bld_base->uint_bld.zero,
2149 current_vec);
2150
2151 LLVMBuildStore(builder, current_vec, ptr);
2152 }
2153
2154 static LLVMValueRef
clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,LLVMValueRef current_mask_vec,LLVMValueRef total_emitted_vertices_vec)2155 clamp_mask_to_max_output_vertices(struct lp_build_nir_soa_context * bld,
2156 LLVMValueRef current_mask_vec,
2157 LLVMValueRef total_emitted_vertices_vec)
2158 {
2159 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2160 struct lp_build_context *int_bld = &bld->bld_base.int_bld;
2161 LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS,
2162 total_emitted_vertices_vec,
2163 bld->max_output_vertices_vec);
2164
2165 return LLVMBuildAnd(builder, current_mask_vec, max_mask, "");
2166 }
2167
emit_vertex(struct lp_build_nir_context * bld_base,uint32_t stream_id)2168 static void emit_vertex(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2169 {
2170 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2171 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2172
2173 if (stream_id >= bld->gs_vertex_streams)
2174 return;
2175 assert(bld->gs_iface->emit_vertex);
2176 LLVMValueRef total_emitted_vertices_vec =
2177 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2178 LLVMValueRef mask = mask_vec(bld_base);
2179 mask = clamp_mask_to_max_output_vertices(bld, mask,
2180 total_emitted_vertices_vec);
2181 bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base.base,
2182 bld->outputs,
2183 total_emitted_vertices_vec,
2184 mask,
2185 lp_build_const_int_vec(bld->bld_base.base.gallivm, bld->bld_base.base.type, stream_id));
2186
2187 increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2188 mask);
2189 increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr[stream_id],
2190 mask);
2191 }
2192
2193 static void
end_primitive_masked(struct lp_build_nir_context * bld_base,LLVMValueRef mask,uint32_t stream_id)2194 end_primitive_masked(struct lp_build_nir_context * bld_base,
2195 LLVMValueRef mask, uint32_t stream_id)
2196 {
2197 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2198 LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder;
2199
2200 if (stream_id >= bld->gs_vertex_streams)
2201 return;
2202 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2203 LLVMValueRef emitted_vertices_vec =
2204 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_vertices_vec_ptr[stream_id], "");
2205 LLVMValueRef emitted_prims_vec =
2206 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->emitted_prims_vec_ptr[stream_id], "");
2207 LLVMValueRef total_emitted_vertices_vec =
2208 LLVMBuildLoad2(builder, bld->bld_base.uint_bld.vec_type, bld->total_emitted_vertices_vec_ptr[stream_id], "");
2209
2210 LLVMValueRef emitted_mask = lp_build_cmp(uint_bld,
2211 PIPE_FUNC_NOTEQUAL,
2212 emitted_vertices_vec,
2213 uint_bld->zero);
2214 mask = LLVMBuildAnd(builder, mask, emitted_mask, "");
2215 bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base.base,
2216 total_emitted_vertices_vec,
2217 emitted_vertices_vec, emitted_prims_vec, mask, stream_id);
2218 increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr[stream_id],
2219 mask);
2220 clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr[stream_id],
2221 mask);
2222 }
2223
end_primitive(struct lp_build_nir_context * bld_base,uint32_t stream_id)2224 static void end_primitive(struct lp_build_nir_context *bld_base, uint32_t stream_id)
2225 {
2226 ASSERTED struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2227
2228 assert(bld->gs_iface->end_primitive);
2229
2230 LLVMValueRef mask = mask_vec(bld_base);
2231 end_primitive_masked(bld_base, mask, stream_id);
2232 }
2233
2234 static void
emit_prologue(struct lp_build_nir_soa_context * bld)2235 emit_prologue(struct lp_build_nir_soa_context *bld)
2236 {
2237 struct gallivm_state * gallivm = bld->bld_base.base.gallivm;
2238 if (bld->indirects & nir_var_shader_in && !bld->gs_iface && !bld->tcs_iface && !bld->tes_iface) {
2239 uint32_t num_inputs = bld->num_inputs;
2240 /* If this is an indirect case, the number of inputs should not be 0 */
2241 assert(num_inputs > 0);
2242
2243 unsigned index, chan;
2244 LLVMTypeRef vec_type = bld->bld_base.base.vec_type;
2245 LLVMValueRef array_size = lp_build_const_int32(gallivm, num_inputs * 4);
2246 bld->inputs_array = lp_build_array_alloca(gallivm,
2247 vec_type, array_size,
2248 "input_array");
2249
2250 for (index = 0; index < num_inputs; ++index) {
2251 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
2252 LLVMValueRef lindex =
2253 lp_build_const_int32(gallivm, index * 4 + chan);
2254 LLVMValueRef input_ptr =
2255 LLVMBuildGEP2(gallivm->builder, vec_type, bld->inputs_array, &lindex, 1, "");
2256 LLVMValueRef value = bld->inputs[index][chan];
2257 if (value)
2258 LLVMBuildStore(gallivm->builder, value, input_ptr);
2259 }
2260 }
2261 }
2262 }
2263
emit_vote(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2264 static void emit_vote(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2265 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2266 {
2267 struct gallivm_state * gallivm = bld_base->base.gallivm;
2268 LLVMBuilderRef builder = gallivm->builder;
2269 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2270 LLVMValueRef exec_mask = mask_vec(bld_base);
2271 struct lp_build_loop_state loop_state;
2272 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2273
2274 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->uint_bld.elem_type, "");
2275 LLVMValueRef eq_store = lp_build_alloca(gallivm, get_int_bld(bld_base, true, bit_size)->elem_type, "");
2276 LLVMValueRef init_val = NULL;
2277 if (instr->intrinsic == nir_intrinsic_vote_ieq ||
2278 instr->intrinsic == nir_intrinsic_vote_feq) {
2279 /* for equal we unfortunately have to loop and find the first valid one. */
2280 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2281 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2282
2283 struct lp_build_if_state ifthen;
2284 lp_build_if(&ifthen, gallivm, if_cond);
2285 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2286 loop_state.counter, "");
2287 LLVMBuildStore(builder, value_ptr, eq_store);
2288 LLVMBuildStore(builder, lp_build_const_int32(gallivm, -1), res_store);
2289 lp_build_endif(&ifthen);
2290 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2291 NULL, LLVMIntUGE);
2292 init_val = LLVMBuildLoad2(builder, get_int_bld(bld_base, true, bit_size)->elem_type, eq_store, "");
2293 } else {
2294 LLVMBuildStore(builder, lp_build_const_int32(gallivm, instr->intrinsic == nir_intrinsic_vote_any ? 0 : -1), res_store);
2295 }
2296
2297 LLVMValueRef res;
2298 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2299 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2300 loop_state.counter, "");
2301 struct lp_build_if_state ifthen;
2302 LLVMValueRef if_cond;
2303 if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2304
2305 lp_build_if(&ifthen, gallivm, if_cond);
2306 res = LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, "");
2307
2308 if (instr->intrinsic == nir_intrinsic_vote_feq) {
2309 struct lp_build_context *flt_bld = get_flt_bld(bld_base, bit_size);
2310 LLVMValueRef tmp = LLVMBuildFCmp(builder, LLVMRealUEQ,
2311 LLVMBuildBitCast(builder, init_val, flt_bld->elem_type, ""),
2312 LLVMBuildBitCast(builder, value_ptr, flt_bld->elem_type, ""), "");
2313 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2314 res = LLVMBuildAnd(builder, res, tmp, "");
2315 } else if (instr->intrinsic == nir_intrinsic_vote_ieq) {
2316 LLVMValueRef tmp = LLVMBuildICmp(builder, LLVMIntEQ, init_val, value_ptr, "");
2317 tmp = LLVMBuildSExt(builder, tmp, bld_base->uint_bld.elem_type, "");
2318 res = LLVMBuildAnd(builder, res, tmp, "");
2319 } else if (instr->intrinsic == nir_intrinsic_vote_any)
2320 res = LLVMBuildOr(builder, res, value_ptr, "");
2321 else
2322 res = LLVMBuildAnd(builder, res, value_ptr, "");
2323 LLVMBuildStore(builder, res, res_store);
2324 lp_build_endif(&ifthen);
2325 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2326 NULL, LLVMIntUGE);
2327 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2328 LLVMBuildLoad2(builder, bld_base->uint_bld.elem_type, res_store, ""));
2329 }
2330
emit_ballot(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2331 static void emit_ballot(struct lp_build_nir_context *bld_base, LLVMValueRef src, nir_intrinsic_instr *instr, LLVMValueRef result[4])
2332 {
2333 struct gallivm_state * gallivm = bld_base->base.gallivm;
2334 LLVMBuilderRef builder = gallivm->builder;
2335 LLVMValueRef exec_mask = mask_vec(bld_base);
2336 struct lp_build_loop_state loop_state;
2337 src = LLVMBuildAnd(builder, src, exec_mask, "");
2338 LLVMValueRef res_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2339 LLVMValueRef res;
2340 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2341 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, src,
2342 loop_state.counter, "");
2343 res = LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, "");
2344 res = LLVMBuildOr(builder,
2345 res,
2346 LLVMBuildAnd(builder, value_ptr, LLVMBuildShl(builder, lp_build_const_int32(gallivm, 1), loop_state.counter, ""), ""), "");
2347 LLVMBuildStore(builder, res, res_store);
2348
2349 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2350 NULL, LLVMIntUGE);
2351 result[0] = lp_build_broadcast_scalar(&bld_base->uint_bld,
2352 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, res_store, ""));
2353 }
2354
emit_elect(struct lp_build_nir_context * bld_base,LLVMValueRef result[4])2355 static void emit_elect(struct lp_build_nir_context *bld_base, LLVMValueRef result[4])
2356 {
2357 struct gallivm_state *gallivm = bld_base->base.gallivm;
2358 LLVMBuilderRef builder = gallivm->builder;
2359 LLVMValueRef exec_mask = mask_vec(bld_base);
2360 struct lp_build_loop_state loop_state;
2361
2362 LLVMValueRef idx_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2363 LLVMValueRef found_store = lp_build_alloca(gallivm, bld_base->int_bld.elem_type, "");
2364 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2365 LLVMValueRef value_ptr = LLVMBuildExtractElement(gallivm->builder, exec_mask,
2366 loop_state.counter, "");
2367 LLVMValueRef cond = LLVMBuildICmp(gallivm->builder,
2368 LLVMIntEQ,
2369 value_ptr,
2370 lp_build_const_int32(gallivm, -1), "");
2371 LLVMValueRef cond2 = LLVMBuildICmp(gallivm->builder,
2372 LLVMIntEQ,
2373 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, found_store, ""),
2374 lp_build_const_int32(gallivm, 0), "");
2375
2376 cond = LLVMBuildAnd(builder, cond, cond2, "");
2377 struct lp_build_if_state ifthen;
2378 lp_build_if(&ifthen, gallivm, cond);
2379 LLVMBuildStore(builder, lp_build_const_int32(gallivm, 1), found_store);
2380 LLVMBuildStore(builder, loop_state.counter, idx_store);
2381 lp_build_endif(&ifthen);
2382 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2383 NULL, LLVMIntUGE);
2384
2385 result[0] = LLVMBuildInsertElement(builder, bld_base->uint_bld.zero,
2386 lp_build_const_int32(gallivm, -1),
2387 LLVMBuildLoad2(builder, bld_base->int_bld.elem_type, idx_store, ""),
2388 "");
2389 }
2390
2391 #if LLVM_VERSION_MAJOR >= 10
emit_shuffle(struct lp_build_nir_context * bld_base,LLVMValueRef src,LLVMValueRef index,nir_intrinsic_instr * instr,LLVMValueRef result[4])2392 static void emit_shuffle(struct lp_build_nir_context *bld_base, LLVMValueRef src, LLVMValueRef index,
2393 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2394 {
2395 assert(instr->intrinsic == nir_intrinsic_shuffle);
2396
2397 struct gallivm_state *gallivm = bld_base->base.gallivm;
2398 LLVMBuilderRef builder = gallivm->builder;
2399 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2400 uint32_t index_bit_size = nir_src_bit_size(instr->src[1]);
2401 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2402
2403 if (util_get_cpu_caps()->has_avx2 && bit_size == 32 && index_bit_size == 32 && int_bld->type.length == 8) {
2404 /* freeze `src` in case inactive invocations contain poison */
2405 src = LLVMBuildFreeze(builder, src, "");
2406 result[0] = lp_build_intrinsic_binary(builder, "llvm.x86.avx2.permd", int_bld->vec_type, src, index);
2407 } else {
2408 LLVMValueRef res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2409 struct lp_build_loop_state loop_state;
2410 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2411
2412 LLVMValueRef index_value = LLVMBuildExtractElement(builder, index, loop_state.counter, "");
2413
2414 LLVMValueRef src_value = LLVMBuildExtractElement(builder, src, index_value, "");
2415 /* freeze `src_value` in case an out-of-bounds index or an index into an
2416 * inactive invocation results in poison
2417 */
2418 src_value = LLVMBuildFreeze(builder, src_value, "");
2419
2420 LLVMValueRef res = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2421 res = LLVMBuildInsertElement(builder, res, src_value, loop_state.counter, "");
2422 LLVMBuildStore(builder, res, res_store);
2423
2424 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2425 NULL, LLVMIntUGE);
2426
2427 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2428 }
2429 }
2430 #endif
2431
emit_reduce(struct lp_build_nir_context * bld_base,LLVMValueRef src,nir_intrinsic_instr * instr,LLVMValueRef result[4])2432 static void emit_reduce(struct lp_build_nir_context *bld_base, LLVMValueRef src,
2433 nir_intrinsic_instr *instr, LLVMValueRef result[4])
2434 {
2435 struct gallivm_state *gallivm = bld_base->base.gallivm;
2436 LLVMBuilderRef builder = gallivm->builder;
2437 uint32_t bit_size = nir_src_bit_size(instr->src[0]);
2438 /* can't use llvm reduction intrinsics because of exec_mask */
2439 LLVMValueRef exec_mask = mask_vec(bld_base);
2440 struct lp_build_loop_state loop_state;
2441 nir_op reduction_op = nir_intrinsic_reduction_op(instr);
2442
2443 LLVMValueRef res_store = NULL;
2444 LLVMValueRef scan_store;
2445 struct lp_build_context *int_bld = get_int_bld(bld_base, true, bit_size);
2446
2447 if (instr->intrinsic != nir_intrinsic_reduce)
2448 res_store = lp_build_alloca(gallivm, int_bld->vec_type, "");
2449
2450 scan_store = lp_build_alloca(gallivm, int_bld->elem_type, "");
2451
2452 struct lp_build_context elem_bld;
2453 bool is_flt = reduction_op == nir_op_fadd ||
2454 reduction_op == nir_op_fmul ||
2455 reduction_op == nir_op_fmin ||
2456 reduction_op == nir_op_fmax;
2457 bool is_unsigned = reduction_op == nir_op_umin ||
2458 reduction_op == nir_op_umax;
2459
2460 struct lp_build_context *vec_bld = is_flt ? get_flt_bld(bld_base, bit_size) :
2461 get_int_bld(bld_base, is_unsigned, bit_size);
2462
2463 lp_build_context_init(&elem_bld, gallivm, lp_elem_type(vec_bld->type));
2464
2465 LLVMValueRef store_val = NULL;
2466 /*
2467 * Put the identity value for the operation into the storage
2468 */
2469 switch (reduction_op) {
2470 case nir_op_fmin: {
2471 LLVMValueRef flt_max = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), INFINITY) :
2472 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), INFINITY) : lp_build_const_float(gallivm, INFINITY));
2473 store_val = LLVMBuildBitCast(builder, flt_max, int_bld->elem_type, "");
2474 break;
2475 }
2476 case nir_op_fmax: {
2477 LLVMValueRef flt_min = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), -INFINITY) :
2478 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), -INFINITY) : lp_build_const_float(gallivm, -INFINITY));
2479 store_val = LLVMBuildBitCast(builder, flt_min, int_bld->elem_type, "");
2480 break;
2481 }
2482 case nir_op_fmul: {
2483 LLVMValueRef flt_one = bit_size == 64 ? LLVMConstReal(LLVMDoubleTypeInContext(gallivm->context), 1.0) :
2484 (bit_size == 16 ? LLVMConstReal(LLVMHalfTypeInContext(gallivm->context), 1.0) : lp_build_const_float(gallivm, 1.0));
2485 store_val = LLVMBuildBitCast(builder, flt_one, int_bld->elem_type, "");
2486 break;
2487 }
2488 case nir_op_umin:
2489 switch (bit_size) {
2490 case 8:
2491 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), UINT8_MAX, 0);
2492 break;
2493 case 16:
2494 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), UINT16_MAX, 0);
2495 break;
2496 case 32:
2497 default:
2498 store_val = lp_build_const_int32(gallivm, UINT_MAX);
2499 break;
2500 case 64:
2501 store_val = lp_build_const_int64(gallivm, UINT64_MAX);
2502 break;
2503 }
2504 break;
2505 case nir_op_imin:
2506 switch (bit_size) {
2507 case 8:
2508 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MAX, 0);
2509 break;
2510 case 16:
2511 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MAX, 0);
2512 break;
2513 case 32:
2514 default:
2515 store_val = lp_build_const_int32(gallivm, INT_MAX);
2516 break;
2517 case 64:
2518 store_val = lp_build_const_int64(gallivm, INT64_MAX);
2519 break;
2520 }
2521 break;
2522 case nir_op_imax:
2523 switch (bit_size) {
2524 case 8:
2525 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), INT8_MIN, 0);
2526 break;
2527 case 16:
2528 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), INT16_MIN, 0);
2529 break;
2530 case 32:
2531 default:
2532 store_val = lp_build_const_int32(gallivm, INT_MIN);
2533 break;
2534 case 64:
2535 store_val = lp_build_const_int64(gallivm, INT64_MIN);
2536 break;
2537 }
2538 break;
2539 case nir_op_imul:
2540 switch (bit_size) {
2541 case 8:
2542 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 1, 0);
2543 break;
2544 case 16:
2545 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 1, 0);
2546 break;
2547 case 32:
2548 default:
2549 store_val = lp_build_const_int32(gallivm, 1);
2550 break;
2551 case 64:
2552 store_val = lp_build_const_int64(gallivm, 1);
2553 break;
2554 }
2555 break;
2556 case nir_op_iand:
2557 switch (bit_size) {
2558 case 8:
2559 store_val = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), 0xff, 0);
2560 break;
2561 case 16:
2562 store_val = LLVMConstInt(LLVMInt16TypeInContext(gallivm->context), 0xffff, 0);
2563 break;
2564 case 32:
2565 default:
2566 store_val = lp_build_const_int32(gallivm, 0xffffffff);
2567 break;
2568 case 64:
2569 store_val = lp_build_const_int64(gallivm, 0xffffffffffffffffLL);
2570 break;
2571 }
2572 break;
2573 default:
2574 break;
2575 }
2576 if (store_val)
2577 LLVMBuildStore(builder, store_val, scan_store);
2578
2579 LLVMValueRef outer_cond = LLVMBuildICmp(builder, LLVMIntNE, exec_mask, bld_base->uint_bld.zero, "");
2580
2581 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
2582
2583 struct lp_build_if_state ifthen;
2584 LLVMValueRef if_cond = LLVMBuildExtractElement(gallivm->builder, outer_cond, loop_state.counter, "");
2585 lp_build_if(&ifthen, gallivm, if_cond);
2586 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder, src, loop_state.counter, "");
2587
2588 LLVMValueRef res = NULL;
2589 LLVMValueRef scan_val = LLVMBuildLoad2(gallivm->builder, int_bld->elem_type, scan_store, "");
2590 if (instr->intrinsic != nir_intrinsic_reduce)
2591 res = LLVMBuildLoad2(gallivm->builder, int_bld->vec_type, res_store, "");
2592
2593 if (instr->intrinsic == nir_intrinsic_exclusive_scan)
2594 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2595
2596 if (is_flt) {
2597 scan_val = LLVMBuildBitCast(builder, scan_val, elem_bld.elem_type, "");
2598 value = LLVMBuildBitCast(builder, value, elem_bld.elem_type, "");
2599 }
2600 switch (reduction_op) {
2601 case nir_op_fadd:
2602 case nir_op_iadd:
2603 scan_val = lp_build_add(&elem_bld, value, scan_val);
2604 break;
2605 case nir_op_fmul:
2606 case nir_op_imul:
2607 scan_val = lp_build_mul(&elem_bld, value, scan_val);
2608 break;
2609 case nir_op_imin:
2610 case nir_op_umin:
2611 case nir_op_fmin:
2612 scan_val = lp_build_min(&elem_bld, value, scan_val);
2613 break;
2614 case nir_op_imax:
2615 case nir_op_umax:
2616 case nir_op_fmax:
2617 scan_val = lp_build_max(&elem_bld, value, scan_val);
2618 break;
2619 case nir_op_iand:
2620 scan_val = lp_build_and(&elem_bld, value, scan_val);
2621 break;
2622 case nir_op_ior:
2623 scan_val = lp_build_or(&elem_bld, value, scan_val);
2624 break;
2625 case nir_op_ixor:
2626 scan_val = lp_build_xor(&elem_bld, value, scan_val);
2627 break;
2628 default:
2629 assert(0);
2630 break;
2631 }
2632 if (is_flt)
2633 scan_val = LLVMBuildBitCast(builder, scan_val, int_bld->elem_type, "");
2634 LLVMBuildStore(builder, scan_val, scan_store);
2635
2636 if (instr->intrinsic == nir_intrinsic_inclusive_scan) {
2637 res = LLVMBuildInsertElement(builder, res, scan_val, loop_state.counter, "");
2638 }
2639
2640 if (instr->intrinsic != nir_intrinsic_reduce)
2641 LLVMBuildStore(builder, res, res_store);
2642 lp_build_endif(&ifthen);
2643
2644 lp_build_loop_end_cond(&loop_state, lp_build_const_int32(gallivm, bld_base->uint_bld.type.length),
2645 NULL, LLVMIntUGE);
2646 if (instr->intrinsic == nir_intrinsic_reduce)
2647 result[0] = lp_build_broadcast_scalar(int_bld, LLVMBuildLoad2(builder, int_bld->elem_type, scan_store, ""));
2648 else
2649 result[0] = LLVMBuildLoad2(builder, int_bld->vec_type, res_store, "");
2650 }
2651
emit_read_invocation(struct lp_build_nir_context * bld_base,LLVMValueRef src,unsigned bit_size,LLVMValueRef invoc,LLVMValueRef result[4])2652 static void emit_read_invocation(struct lp_build_nir_context *bld_base,
2653 LLVMValueRef src,
2654 unsigned bit_size,
2655 LLVMValueRef invoc,
2656 LLVMValueRef result[4])
2657 {
2658 struct gallivm_state *gallivm = bld_base->base.gallivm;
2659 LLVMValueRef idx = first_active_invocation(bld_base);
2660 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, bit_size);
2661
2662 /* If we're emitting readInvocation() (as opposed to readFirstInvocation),
2663 * use the first active channel to pull the invocation index number out of
2664 * the invocation arg.
2665 */
2666 if (invoc)
2667 idx = LLVMBuildExtractElement(gallivm->builder, invoc, idx, "");
2668
2669 LLVMValueRef value = LLVMBuildExtractElement(gallivm->builder,
2670 src, idx, "");
2671 result[0] = lp_build_broadcast_scalar(uint_bld, value);
2672 }
2673
2674 static void
emit_interp_at(struct lp_build_nir_context * bld_base,unsigned num_components,nir_variable * var,bool centroid,bool sample,unsigned const_index,LLVMValueRef indir_index,LLVMValueRef offsets[2],LLVMValueRef dst[4])2675 emit_interp_at(struct lp_build_nir_context *bld_base,
2676 unsigned num_components,
2677 nir_variable *var,
2678 bool centroid,
2679 bool sample,
2680 unsigned const_index,
2681 LLVMValueRef indir_index,
2682 LLVMValueRef offsets[2],
2683 LLVMValueRef dst[4])
2684 {
2685 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2686
2687 for (unsigned i = 0; i < num_components; i++) {
2688 dst[i] = bld->fs_iface->interp_fn(bld->fs_iface, &bld_base->base,
2689 const_index + var->data.driver_location, i + var->data.location_frac,
2690 centroid, sample, indir_index, offsets);
2691 }
2692 }
2693
2694 static void
emit_set_vertex_and_primitive_count(struct lp_build_nir_context * bld_base,LLVMValueRef vert_count,LLVMValueRef prim_count)2695 emit_set_vertex_and_primitive_count(struct lp_build_nir_context *bld_base,
2696 LLVMValueRef vert_count,
2697 LLVMValueRef prim_count)
2698 {
2699 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2700 struct gallivm_state *gallivm = bld_base->base.gallivm;
2701 assert(bld->mesh_iface);
2702 LLVMValueRef idx = first_active_invocation(bld_base);
2703
2704 LLVMValueRef vcount = LLVMBuildExtractElement(gallivm->builder,
2705 vert_count, idx, "");
2706 LLVMValueRef pcount = LLVMBuildExtractElement(gallivm->builder,
2707 prim_count, idx, "");
2708
2709 bld->mesh_iface->emit_vertex_and_primitive_count(bld->mesh_iface, &bld_base->base, vcount, pcount);
2710 }
2711
2712 static void
emit_launch_mesh_workgroups(struct lp_build_nir_context * bld_base,LLVMValueRef launch_grid)2713 emit_launch_mesh_workgroups(struct lp_build_nir_context *bld_base,
2714 LLVMValueRef launch_grid)
2715 {
2716 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2717 struct gallivm_state *gallivm = bld_base->base.gallivm;
2718 LLVMTypeRef vec_type = LLVMArrayType(LLVMInt32TypeInContext(gallivm->context), 3);
2719
2720 LLVMValueRef local_invoc_idx = get_local_invocation_index(bld);
2721
2722 vec_type = LLVMPointerType(vec_type, 0);
2723
2724 local_invoc_idx = LLVMBuildExtractElement(gallivm->builder, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2725 LLVMValueRef if_cond = LLVMBuildICmp(gallivm->builder, LLVMIntEQ, local_invoc_idx, lp_build_const_int32(gallivm, 0), "");
2726 struct lp_build_if_state ifthen;
2727 lp_build_if(&ifthen, gallivm, if_cond);
2728 LLVMValueRef ptr = bld->payload_ptr;
2729 ptr = LLVMBuildPtrToInt(gallivm->builder, ptr, bld_base->int64_bld.elem_type, "");
2730 for (unsigned i = 0; i < 3; i++) {
2731 LLVMValueRef lg = LLVMBuildExtractValue(gallivm->builder, launch_grid, i, "");
2732 lg = LLVMBuildExtractElement(gallivm->builder, lg, lp_build_const_int32(gallivm, 0), "");
2733 LLVMValueRef this_ptr = LLVMBuildIntToPtr(gallivm->builder, ptr, LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0), "");
2734 LLVMBuildStore(gallivm->builder, lg, this_ptr);
2735 ptr = LLVMBuildAdd(gallivm->builder, ptr, lp_build_const_int64(gallivm, 4), "");
2736 }
2737 lp_build_endif(&ifthen);
2738 }
2739
2740 static void
emit_call(struct lp_build_nir_context * bld_base,struct lp_build_fn * fn,int num_args,LLVMValueRef * args)2741 emit_call(struct lp_build_nir_context *bld_base,
2742 struct lp_build_fn *fn,
2743 int num_args,
2744 LLVMValueRef *args)
2745 {
2746 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2747
2748 args[0] = mask_vec(bld_base);
2749 args[1] = bld->call_context_ptr;
2750 LLVMBuildCall2(bld_base->base.gallivm->builder, fn->fn_type, fn->fn, args, num_args, "");
2751 }
2752
get_scratch_thread_offsets(struct gallivm_state * gallivm,struct lp_type type,unsigned scratch_size)2753 static LLVMValueRef get_scratch_thread_offsets(struct gallivm_state *gallivm,
2754 struct lp_type type,
2755 unsigned scratch_size)
2756 {
2757 LLVMTypeRef elem_type = lp_build_int_elem_type(gallivm, type);
2758 LLVMValueRef elems[LP_MAX_VECTOR_LENGTH];
2759 unsigned i;
2760
2761 if (type.length == 1)
2762 return LLVMConstInt(elem_type, 0, 0);
2763
2764 for (i = 0; i < type.length; ++i)
2765 elems[i] = LLVMConstInt(elem_type, scratch_size * i, 0);
2766
2767 return LLVMConstVector(elems, type.length);
2768 }
2769
2770 static void
emit_load_scratch(struct lp_build_nir_context * bld_base,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])2771 emit_load_scratch(struct lp_build_nir_context *bld_base,
2772 unsigned nc, unsigned bit_size,
2773 LLVMValueRef offset,
2774 LLVMValueRef outval[NIR_MAX_VEC_COMPONENTS])
2775 {
2776 struct gallivm_state * gallivm = bld_base->base.gallivm;
2777 LLVMBuilderRef builder = gallivm->builder;
2778 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2779 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2780 struct lp_build_context *load_bld;
2781 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2782 LLVMValueRef exec_mask = mask_vec(bld_base);
2783 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2784 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2785 bld->scratch_ptr);
2786 load_bld = get_int_bld(bld_base, true, bit_size);
2787
2788 offset = lp_build_add(uint_bld, offset, thread_offsets);
2789
2790 for (unsigned c = 0; c < nc; c++) {
2791 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2792
2793 outval[c] = lp_build_masked_gather(gallivm, load_bld->type.length, bit_size,
2794 load_bld->vec_type,
2795 lp_vec_add_offset_ptr(bld_base, bit_size,
2796 scratch_ptr_vec,
2797 chan_offset),
2798 exec_mask);
2799 outval[c] = LLVMBuildBitCast(builder, outval[c], load_bld->vec_type, "");
2800 }
2801 }
2802
2803 static void
emit_store_scratch(struct lp_build_nir_context * bld_base,unsigned writemask,unsigned nc,unsigned bit_size,LLVMValueRef offset,LLVMValueRef dst)2804 emit_store_scratch(struct lp_build_nir_context *bld_base,
2805 unsigned writemask, unsigned nc,
2806 unsigned bit_size, LLVMValueRef offset,
2807 LLVMValueRef dst)
2808 {
2809 struct gallivm_state * gallivm = bld_base->base.gallivm;
2810 LLVMBuilderRef builder = gallivm->builder;
2811 struct lp_build_nir_soa_context *bld = (struct lp_build_nir_soa_context *)bld_base;
2812 struct lp_build_context *uint_bld = &bld_base->uint_bld;
2813 struct lp_build_context *store_bld;
2814 LLVMValueRef thread_offsets = get_scratch_thread_offsets(gallivm, uint_bld->type, bld->scratch_size);
2815 LLVMValueRef scratch_ptr_vec = lp_build_broadcast(gallivm,
2816 LLVMVectorType(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), uint_bld->type.length),
2817 bld->scratch_ptr);
2818 store_bld = get_int_bld(bld_base, true, bit_size);
2819
2820 LLVMValueRef exec_mask = mask_vec(bld_base);
2821 offset = lp_build_add(uint_bld, offset, thread_offsets);
2822
2823 for (unsigned c = 0; c < nc; c++) {
2824 if (!(writemask & (1u << c)))
2825 continue;
2826 LLVMValueRef val = (nc == 1) ? dst : LLVMBuildExtractValue(builder, dst, c, "");
2827
2828 LLVMValueRef chan_offset = lp_build_add(uint_bld, offset, lp_build_const_int_vec(gallivm, uint_bld->type, c * (bit_size / 8)));
2829
2830 val = LLVMBuildBitCast(builder, val, store_bld->vec_type, "");
2831
2832 lp_build_masked_scatter(gallivm, store_bld->type.length, bit_size,
2833 lp_vec_add_offset_ptr(bld_base, bit_size,
2834 scratch_ptr_vec, chan_offset),
2835 val, exec_mask);
2836 }
2837 }
2838
2839 static void
emit_clock(struct lp_build_nir_context * bld_base,LLVMValueRef dst[4])2840 emit_clock(struct lp_build_nir_context *bld_base,
2841 LLVMValueRef dst[4])
2842 {
2843 struct gallivm_state *gallivm = bld_base->base.gallivm;
2844 LLVMBuilderRef builder = gallivm->builder;
2845 struct lp_build_context *uint_bld = get_int_bld(bld_base, true, 32);
2846
2847 lp_init_clock_hook(gallivm);
2848
2849 LLVMTypeRef get_time_type = LLVMFunctionType(LLVMInt64TypeInContext(gallivm->context), NULL, 0, 1);
2850 LLVMValueRef result = LLVMBuildCall2(builder, get_time_type, gallivm->get_time_hook, NULL, 0, "");
2851
2852 LLVMValueRef hi = LLVMBuildShl(builder, result, lp_build_const_int64(gallivm, 32), "");
2853 hi = LLVMBuildTrunc(builder, hi, uint_bld->elem_type, "");
2854 LLVMValueRef lo = LLVMBuildTrunc(builder, result, uint_bld->elem_type, "");
2855 dst[0] = lp_build_broadcast_scalar(uint_bld, lo);
2856 dst[1] = lp_build_broadcast_scalar(uint_bld, hi);
2857 }
2858
2859 LLVMTypeRef
lp_build_cs_func_call_context(struct gallivm_state * gallivm,int length,LLVMTypeRef context_type,LLVMTypeRef resources_type)2860 lp_build_cs_func_call_context(struct gallivm_state *gallivm, int length,
2861 LLVMTypeRef context_type, LLVMTypeRef resources_type)
2862 {
2863 LLVMTypeRef args[LP_NIR_CALL_CONTEXT_MAX_ARGS];
2864
2865 args[LP_NIR_CALL_CONTEXT_CONTEXT] = LLVMPointerType(context_type, 0);
2866 args[LP_NIR_CALL_CONTEXT_RESOURCES] = LLVMPointerType(resources_type, 0);
2867 args[LP_NIR_CALL_CONTEXT_SHARED] = LLVMPointerType(LLVMInt32TypeInContext(gallivm->context), 0); /* shared_ptr */
2868 args[LP_NIR_CALL_CONTEXT_SCRATCH] = LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0); /* scratch ptr */
2869 args[LP_NIR_CALL_CONTEXT_WORK_DIM] = LLVMInt32TypeInContext(gallivm->context); /* work_dim */
2870 args[LP_NIR_CALL_CONTEXT_THREAD_ID_0] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[0] */
2871 args[LP_NIR_CALL_CONTEXT_THREAD_ID_1] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[1] */
2872 args[LP_NIR_CALL_CONTEXT_THREAD_ID_2] = LLVMVectorType(LLVMInt32TypeInContext(gallivm->context), length); /* system_values.thread_id[2] */
2873 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[0] */
2874 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[1] */
2875 args[LP_NIR_CALL_CONTEXT_BLOCK_ID_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_id[2] */
2876
2877 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[0] */
2878 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[1] */
2879 args[LP_NIR_CALL_CONTEXT_GRID_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.grid_size[2] */
2880 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[0] */
2881 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[1] */
2882 args[LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2] = LLVMInt32TypeInContext(gallivm->context); /* system_values.block_size[2] */
2883
2884 LLVMTypeRef stype = LLVMStructTypeInContext(gallivm->context, args, LP_NIR_CALL_CONTEXT_MAX_ARGS, 0);
2885 return stype;
2886 }
2887
2888 static void
build_call_context(struct lp_build_nir_soa_context * bld)2889 build_call_context(struct lp_build_nir_soa_context *bld)
2890 {
2891 struct gallivm_state *gallivm = bld->bld_base.base.gallivm;
2892 bld->call_context_ptr = lp_build_alloca(gallivm, bld->call_context_type, "callcontext");
2893 LLVMValueRef call_context = LLVMGetUndef(bld->call_context_type);
2894 call_context = LLVMBuildInsertValue(gallivm->builder,
2895 call_context, bld->context_ptr, LP_NIR_CALL_CONTEXT_CONTEXT, "");
2896 call_context = LLVMBuildInsertValue(gallivm->builder,
2897 call_context, bld->resources_ptr, LP_NIR_CALL_CONTEXT_RESOURCES, "");
2898 if (bld->shared_ptr) {
2899 call_context = LLVMBuildInsertValue(gallivm->builder,
2900 call_context, bld->shared_ptr, LP_NIR_CALL_CONTEXT_SHARED, "");
2901 } else {
2902 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2903 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2904 LP_NIR_CALL_CONTEXT_SHARED, "");
2905 }
2906 if (bld->scratch_ptr) {
2907 call_context = LLVMBuildInsertValue(gallivm->builder,
2908 call_context, bld->scratch_ptr, LP_NIR_CALL_CONTEXT_SCRATCH, "");
2909 } else {
2910 call_context = LLVMBuildInsertValue(gallivm->builder, call_context,
2911 LLVMConstNull(LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0)),
2912 LP_NIR_CALL_CONTEXT_SCRATCH, "");
2913 }
2914 call_context = LLVMBuildInsertValue(gallivm->builder,
2915 call_context, bld->system_values.work_dim, LP_NIR_CALL_CONTEXT_WORK_DIM, "");
2916 call_context = LLVMBuildInsertValue(gallivm->builder,
2917 call_context, bld->system_values.thread_id[0], LP_NIR_CALL_CONTEXT_THREAD_ID_0, "");
2918 call_context = LLVMBuildInsertValue(gallivm->builder,
2919 call_context, bld->system_values.thread_id[1], LP_NIR_CALL_CONTEXT_THREAD_ID_1, "");
2920 call_context = LLVMBuildInsertValue(gallivm->builder,
2921 call_context, bld->system_values.thread_id[2], LP_NIR_CALL_CONTEXT_THREAD_ID_2, "");
2922 call_context = LLVMBuildInsertValue(gallivm->builder,
2923 call_context, bld->system_values.block_id[0], LP_NIR_CALL_CONTEXT_BLOCK_ID_0, "");
2924 call_context = LLVMBuildInsertValue(gallivm->builder,
2925 call_context, bld->system_values.block_id[1], LP_NIR_CALL_CONTEXT_BLOCK_ID_1, "");
2926 call_context = LLVMBuildInsertValue(gallivm->builder,
2927 call_context, bld->system_values.block_id[2], LP_NIR_CALL_CONTEXT_BLOCK_ID_2, "");
2928 call_context = LLVMBuildInsertValue(gallivm->builder,
2929 call_context, bld->system_values.grid_size[0], LP_NIR_CALL_CONTEXT_GRID_SIZE_0, "");
2930 call_context = LLVMBuildInsertValue(gallivm->builder,
2931 call_context, bld->system_values.grid_size[1], LP_NIR_CALL_CONTEXT_GRID_SIZE_1, "");
2932 call_context = LLVMBuildInsertValue(gallivm->builder,
2933 call_context, bld->system_values.grid_size[2], LP_NIR_CALL_CONTEXT_GRID_SIZE_2, "");
2934 call_context = LLVMBuildInsertValue(gallivm->builder,
2935 call_context, bld->system_values.block_size[0], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_0, "");
2936 call_context = LLVMBuildInsertValue(gallivm->builder,
2937 call_context, bld->system_values.block_size[1], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_1, "");
2938 call_context = LLVMBuildInsertValue(gallivm->builder,
2939 call_context, bld->system_values.block_size[2], LP_NIR_CALL_CONTEXT_BLOCK_SIZE_2, "");
2940 LLVMBuildStore(gallivm->builder, call_context, bld->call_context_ptr);
2941 }
2942
lp_build_nir_soa_func(struct gallivm_state * gallivm,struct nir_shader * shader,nir_function_impl * impl,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])2943 void lp_build_nir_soa_func(struct gallivm_state *gallivm,
2944 struct nir_shader *shader,
2945 nir_function_impl *impl,
2946 const struct lp_build_tgsi_params *params,
2947 LLVMValueRef (*outputs)[4])
2948 {
2949 struct lp_build_nir_soa_context bld;
2950 const struct lp_type type = params->type;
2951 struct lp_type res_type;
2952
2953 assert(type.length <= LP_MAX_VECTOR_LENGTH);
2954 memset(&res_type, 0, sizeof res_type);
2955 res_type.width = type.width;
2956 res_type.length = type.length;
2957 res_type.sign = 1;
2958
2959 /* Setup build context */
2960 memset(&bld, 0, sizeof bld);
2961 lp_build_context_init(&bld.bld_base.base, gallivm, type);
2962 lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type));
2963 lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type));
2964 lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type));
2965 lp_build_context_init(&bld.uint_elem_bld, gallivm, lp_elem_type(lp_uint_type(type)));
2966 {
2967 struct lp_type dbl_type;
2968 dbl_type = type;
2969 dbl_type.width *= 2;
2970 lp_build_context_init(&bld.bld_base.dbl_bld, gallivm, dbl_type);
2971 }
2972 {
2973 struct lp_type half_type;
2974 half_type = type;
2975 half_type.width /= 2;
2976 lp_build_context_init(&bld.bld_base.half_bld, gallivm, half_type);
2977 }
2978 {
2979 struct lp_type uint64_type;
2980 uint64_type = lp_uint_type(type);
2981 uint64_type.width *= 2;
2982 lp_build_context_init(&bld.bld_base.uint64_bld, gallivm, uint64_type);
2983 }
2984 {
2985 struct lp_type int64_type;
2986 int64_type = lp_int_type(type);
2987 int64_type.width *= 2;
2988 lp_build_context_init(&bld.bld_base.int64_bld, gallivm, int64_type);
2989 }
2990 {
2991 struct lp_type uint16_type;
2992 uint16_type = lp_uint_type(type);
2993 uint16_type.width /= 2;
2994 lp_build_context_init(&bld.bld_base.uint16_bld, gallivm, uint16_type);
2995 }
2996 {
2997 struct lp_type int16_type;
2998 int16_type = lp_int_type(type);
2999 int16_type.width /= 2;
3000 lp_build_context_init(&bld.bld_base.int16_bld, gallivm, int16_type);
3001 }
3002 {
3003 struct lp_type uint8_type;
3004 uint8_type = lp_uint_type(type);
3005 uint8_type.width /= 4;
3006 lp_build_context_init(&bld.bld_base.uint8_bld, gallivm, uint8_type);
3007 }
3008 {
3009 struct lp_type int8_type;
3010 int8_type = lp_int_type(type);
3011 int8_type.width /= 4;
3012 lp_build_context_init(&bld.bld_base.int8_bld, gallivm, int8_type);
3013 }
3014 bld.bld_base.load_var = emit_load_var;
3015 bld.bld_base.store_var = emit_store_var;
3016 bld.bld_base.load_reg = emit_load_reg;
3017 bld.bld_base.store_reg = emit_store_reg;
3018 bld.bld_base.emit_var_decl = emit_var_decl;
3019 bld.bld_base.load_ubo = emit_load_ubo;
3020 bld.bld_base.load_kernel_arg = emit_load_kernel_arg;
3021 bld.bld_base.load_global = emit_load_global;
3022 bld.bld_base.store_global = emit_store_global;
3023 bld.bld_base.atomic_global = emit_atomic_global;
3024 bld.bld_base.tex = emit_tex;
3025 bld.bld_base.tex_size = emit_tex_size;
3026 bld.bld_base.bgnloop = bgnloop;
3027 bld.bld_base.endloop = endloop;
3028 bld.bld_base.if_cond = if_cond;
3029 bld.bld_base.else_stmt = else_stmt;
3030 bld.bld_base.endif_stmt = endif_stmt;
3031 bld.bld_base.break_stmt = break_stmt;
3032 bld.bld_base.continue_stmt = continue_stmt;
3033 bld.bld_base.sysval_intrin = emit_sysval_intrin;
3034 bld.bld_base.discard = discard;
3035 bld.bld_base.emit_vertex = emit_vertex;
3036 bld.bld_base.end_primitive = end_primitive;
3037 bld.bld_base.load_mem = emit_load_mem;
3038 bld.bld_base.store_mem = emit_store_mem;
3039 bld.bld_base.get_ssbo_size = emit_get_ssbo_size;
3040 bld.bld_base.atomic_mem = emit_atomic_mem;
3041 bld.bld_base.barrier = emit_barrier;
3042 bld.bld_base.image_op = emit_image_op;
3043 bld.bld_base.image_size = emit_image_size;
3044 bld.bld_base.vote = emit_vote;
3045 bld.bld_base.elect = emit_elect;
3046 bld.bld_base.reduce = emit_reduce;
3047 bld.bld_base.ballot = emit_ballot;
3048 #if LLVM_VERSION_MAJOR >= 10
3049 bld.bld_base.shuffle = emit_shuffle;
3050 #endif
3051 bld.bld_base.read_invocation = emit_read_invocation;
3052 bld.bld_base.helper_invocation = emit_helper_invocation;
3053 bld.bld_base.interp_at = emit_interp_at;
3054 bld.bld_base.call = emit_call;
3055 bld.bld_base.load_scratch = emit_load_scratch;
3056 bld.bld_base.store_scratch = emit_store_scratch;
3057 bld.bld_base.load_const = emit_load_const;
3058 bld.bld_base.clock = emit_clock;
3059 bld.bld_base.set_vertex_and_primitive_count = emit_set_vertex_and_primitive_count;
3060 bld.bld_base.launch_mesh_workgroups = emit_launch_mesh_workgroups;
3061
3062 bld.bld_base.fns = params->fns;
3063 bld.bld_base.func = params->current_func;
3064 bld.mask = params->mask;
3065 bld.inputs = params->inputs;
3066 bld.outputs = outputs;
3067 bld.consts_ptr = params->consts_ptr;
3068 bld.ssbo_ptr = params->ssbo_ptr;
3069 bld.sampler = params->sampler;
3070
3071 bld.context_type = params->context_type;
3072 bld.context_ptr = params->context_ptr;
3073 bld.resources_type = params->resources_type;
3074 bld.resources_ptr = params->resources_ptr;
3075 bld.thread_data_type = params->thread_data_type;
3076 bld.thread_data_ptr = params->thread_data_ptr;
3077 bld.bld_base.aniso_filter_table = params->aniso_filter_table;
3078 bld.image = params->image;
3079 bld.shared_ptr = params->shared_ptr;
3080 bld.payload_ptr = params->payload_ptr;
3081 bld.coro = params->coro;
3082 bld.kernel_args_ptr = params->kernel_args;
3083 bld.num_inputs = params->num_inputs;
3084 bld.indirects = 0;
3085 if (shader->info.inputs_read_indirectly)
3086 bld.indirects |= nir_var_shader_in;
3087
3088 bld.gs_iface = params->gs_iface;
3089 bld.tcs_iface = params->tcs_iface;
3090 bld.tes_iface = params->tes_iface;
3091 bld.fs_iface = params->fs_iface;
3092 bld.mesh_iface = params->mesh_iface;
3093 if (bld.gs_iface) {
3094 struct lp_build_context *uint_bld = &bld.bld_base.uint_bld;
3095
3096 bld.gs_vertex_streams = params->gs_vertex_streams;
3097 bld.max_output_vertices_vec = lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type,
3098 shader->info.gs.vertices_out);
3099 for (int i = 0; i < params->gs_vertex_streams; i++) {
3100 bld.emitted_prims_vec_ptr[i] =
3101 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_prims_ptr");
3102 bld.emitted_vertices_vec_ptr[i] =
3103 lp_build_alloca(gallivm, uint_bld->vec_type, "emitted_vertices_ptr");
3104 bld.total_emitted_vertices_vec_ptr[i] =
3105 lp_build_alloca(gallivm, uint_bld->vec_type, "total_emitted_vertices_ptr");
3106 }
3107 }
3108 lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld);
3109
3110 if (params->system_values)
3111 bld.system_values = *params->system_values;
3112
3113 bld.bld_base.shader = shader;
3114
3115 bld.scratch_size = ALIGN(shader->scratch_size, 8);
3116 if (params->scratch_ptr)
3117 bld.scratch_ptr = params->scratch_ptr;
3118 else if (shader->scratch_size) {
3119 bld.scratch_ptr = lp_build_array_alloca(gallivm,
3120 LLVMInt8TypeInContext(gallivm->context),
3121 lp_build_const_int32(gallivm, bld.scratch_size * type.length),
3122 "scratch");
3123 }
3124
3125 if (!exec_list_is_singular(&shader->functions)) {
3126 bld.call_context_type = lp_build_cs_func_call_context(gallivm, type.length, bld.context_type, bld.resources_type);
3127 if (!params->call_context_ptr) {
3128 build_call_context(&bld);
3129 } else
3130 bld.call_context_ptr = params->call_context_ptr;
3131 }
3132
3133 emit_prologue(&bld);
3134 lp_build_nir_llvm(&bld.bld_base, shader, impl);
3135
3136 if (bld.gs_iface) {
3137 LLVMBuilderRef builder = bld.bld_base.base.gallivm->builder;
3138 LLVMValueRef total_emitted_vertices_vec;
3139 LLVMValueRef emitted_prims_vec;
3140
3141 for (int i = 0; i < params->gs_vertex_streams; i++) {
3142 end_primitive_masked(&bld.bld_base, lp_build_mask_value(bld.mask), i);
3143
3144 total_emitted_vertices_vec =
3145 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.total_emitted_vertices_vec_ptr[i], "");
3146
3147 emitted_prims_vec =
3148 LLVMBuildLoad2(builder, bld.bld_base.uint_bld.vec_type, bld.emitted_prims_vec_ptr[i], "");
3149 bld.gs_iface->gs_epilogue(bld.gs_iface,
3150 total_emitted_vertices_vec,
3151 emitted_prims_vec, i);
3152 }
3153 }
3154 lp_exec_mask_fini(&bld.exec_mask);
3155 }
3156
lp_build_nir_soa(struct gallivm_state * gallivm,struct nir_shader * shader,const struct lp_build_tgsi_params * params,LLVMValueRef (* outputs)[4])3157 void lp_build_nir_soa(struct gallivm_state *gallivm,
3158 struct nir_shader *shader,
3159 const struct lp_build_tgsi_params *params,
3160 LLVMValueRef (*outputs)[4])
3161 {
3162 lp_build_nir_prepasses(shader);
3163 lp_build_nir_soa_func(gallivm, shader,
3164 nir_shader_get_entrypoint(shader),
3165 params, outputs);
3166 }
3167