1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * Copyright 2007-2008 VMware, Inc.
5 * All Rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * @file
31 * Position and shader input interpolation.
32 *
33 * @author Jose Fonseca <[email protected]>
34 */
35
36 #include "pipe/p_shader_tokens.h"
37 #include "util/compiler.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "gallivm/lp_bld_logic.h"
47 #include "gallivm/lp_bld_struct.h"
48 #include "gallivm/lp_bld_gather.h"
49 #include "lp_bld_interp.h"
50
51
52 /*
53 * The shader JIT function operates on blocks of quads.
54 * Each block has 2x2 quads and each quad has 2x2 pixels.
55 *
56 * We iterate over the quads in order 0, 1, 2, 3:
57 *
58 * #################
59 * # | # | #
60 * #---0---#---1---#
61 * # | # | #
62 * #################
63 * # | # | #
64 * #---2---#---3---#
65 * # | # | #
66 * #################
67 *
68 * If we iterate over multiple quads at once, quads 01 and 23 are processed
69 * together.
70 *
71 * Within each quad, we have four pixels which are represented in SOA
72 * order:
73 *
74 * #########
75 * # 0 | 1 #
76 * #---+---#
77 * # 2 | 3 #
78 * #########
79 *
80 * So the green channel (for example) of the four pixels is stored in
81 * a single vector register: {g0, g1, g2, g3}.
82 * The order stays the same even with multiple quads:
83 * 0 1 4 5
84 * 2 3 6 7
85 * is stored as g0..g7
86 */
87
88
89 /**
90 * Do one perspective divide per quad.
91 *
92 * For perspective interpolation, the final attribute value is given
93 *
94 * a' = a/w = a * oow
95 *
96 * where
97 *
98 * a = a0 + dadx*x + dady*y
99 * w = w0 + dwdx*x + dwdy*y
100 * oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
101 *
102 * Instead of computing the division per pixel, with this macro we compute the
103 * division on the upper left pixel of each quad, and use a linear
104 * approximation in the remaining pixels, given by:
105 *
106 * da'dx = (dadx - dwdx*a)*oow
107 * da'dy = (dady - dwdy*a)*oow
108 *
109 * Ironically, this actually makes things slower -- probably because the
110 * divide hardware unit is rarely used, whereas the multiply unit is typically
111 * already saturated.
112 */
113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
114
115
116 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
117 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118
119
120 static void
attrib_name(LLVMValueRef val,unsigned attrib,unsigned chan,const char * suffix)121 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
122 {
123 if (attrib == 0)
124 lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
125 else
126 lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
127 }
128
129
130 static void
calc_offsets(struct lp_build_context * coeff_bld,unsigned quad_start_index,LLVMValueRef * pixoffx,LLVMValueRef * pixoffy)131 calc_offsets(struct lp_build_context *coeff_bld,
132 unsigned quad_start_index,
133 LLVMValueRef *pixoffx,
134 LLVMValueRef *pixoffy)
135 {
136 unsigned num_pix = coeff_bld->type.length;
137 struct gallivm_state *gallivm = coeff_bld->gallivm;
138 LLVMBuilderRef builder = coeff_bld->gallivm->builder;
139 LLVMValueRef nr, pixxf, pixyf;
140
141 *pixoffx = coeff_bld->undef;
142 *pixoffy = coeff_bld->undef;
143
144 for (unsigned i = 0; i < num_pix; i++) {
145 nr = lp_build_const_int32(gallivm, i);
146 pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
147 (quad_start_index & 1) * 2);
148 pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
149 (quad_start_index & 2));
150 *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
151 *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
152 }
153 }
154
155
156 static void
calc_centroid_offsets(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef pix_center_offset,LLVMValueRef * centroid_x,LLVMValueRef * centroid_y)157 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158 struct gallivm_state *gallivm,
159 LLVMValueRef loop_iter,
160 LLVMTypeRef mask_type,
161 LLVMValueRef mask_store,
162 LLVMValueRef pix_center_offset,
163 LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
164 {
165 struct lp_build_context *coeff_bld = &bld->coeff_bld;
166 LLVMBuilderRef builder = gallivm->builder;
167 LLVMValueRef s_mask_and = NULL;
168 LLVMValueRef centroid_x_offset = pix_center_offset;
169 LLVMValueRef centroid_y_offset = pix_center_offset;
170 for (int s = bld->coverage_samples - 1; s >= 0; s--) {
171 LLVMValueRef sample_cov;
172 LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
173
174 s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
175 sample_cov = lp_build_pointer_get2(builder, mask_type, mask_store, s_mask_idx);
176 if (s == bld->coverage_samples - 1)
177 s_mask_and = sample_cov;
178 else
179 s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
180
181 LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
182 LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
183
184 x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
185 bld->sample_pos_array, x_val_idx);
186 y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
187 bld->sample_pos_array, y_val_idx);
188 x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
189 y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
190 centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
191 centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
192 }
193 *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
194 *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
195 }
196
197
198 /* Note: this assumes the pointer to elem_type is in address space 0 */
199 static LLVMValueRef
load_casted(LLVMBuilderRef builder,LLVMTypeRef elem_type,LLVMValueRef ptr,const char * name)200 load_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type,
201 LLVMValueRef ptr, const char *name) {
202 ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(elem_type, 0), name);
203 return LLVMBuildLoad2(builder, elem_type, ptr, name);
204 }
205
206
207 static LLVMValueRef
indexed_load(LLVMBuilderRef builder,LLVMTypeRef gep_type,LLVMTypeRef elem_type,LLVMValueRef ptr,LLVMValueRef index,const char * name)208 indexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type,
209 LLVMTypeRef elem_type, LLVMValueRef ptr,
210 LLVMValueRef index, const char *name) {
211 ptr = LLVMBuildGEP2(builder, gep_type, ptr, &index, 1, name);
212 return load_casted(builder, elem_type, ptr, name);
213 }
214
215
216 /* Much easier, and significantly less instructions in the per-stamp
217 * part (less than half) but overall more instructions so a loss if
218 * most quads are active. Might be a win though with larger vectors.
219 * No ability to do per-quad divide (doable but not implemented)
220 * Could be made to work with passed in pixel offsets (i.e. active quad
221 * merging).
222 */
223 static void
coeffs_init_simple(struct lp_build_interp_soa_context * bld,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr)224 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
225 LLVMValueRef a0_ptr,
226 LLVMValueRef dadx_ptr,
227 LLVMValueRef dady_ptr)
228 {
229 struct lp_build_context *coeff_bld = &bld->coeff_bld;
230 struct lp_build_context *setup_bld = &bld->setup_bld;
231 struct gallivm_state *gallivm = coeff_bld->gallivm;
232 LLVMBuilderRef builder = gallivm->builder;
233
234 for (unsigned attrib = 0; attrib < bld->num_attribs; ++attrib) {
235 /*
236 * always fetch all 4 values for performance/simplicity
237 * Note: we do that here because it seems to generate better
238 * code. It generates a lot of moves initially but less
239 * moves later. As far as I can tell this looks like a
240 * llvm issue, instead of simply reloading the values from
241 * the passed in pointers it if it runs out of registers
242 * it spills/reloads them. Maybe some optimization passes
243 * would help.
244 * Might want to investigate this again later.
245 */
246 const enum lp_interp interp = bld->interp[attrib];
247 LLVMValueRef index = lp_build_const_int32(gallivm,
248 attrib * TGSI_NUM_CHANNELS);
249 LLVMValueRef dadxaos = setup_bld->zero;
250 LLVMValueRef dadyaos = setup_bld->zero;
251 LLVMValueRef a0aos = setup_bld->zero;
252
253 /* See: lp_state_fs.c / generate_fragment() / fs_elem_type */
254 LLVMTypeRef fs_elem_type = LLVMFloatTypeInContext(gallivm->context);
255
256 switch (interp) {
257 case LP_INTERP_PERSPECTIVE:
258 FALLTHROUGH;
259
260 case LP_INTERP_LINEAR:
261 dadxaos = indexed_load(builder, fs_elem_type,
262 setup_bld->vec_type, dadx_ptr, index, "");
263 dadyaos = indexed_load(builder, fs_elem_type,
264 setup_bld->vec_type, dady_ptr, index, "");
265 attrib_name(dadxaos, attrib, 0, ".dadxaos");
266 attrib_name(dadyaos, attrib, 0, ".dadyaos");
267 FALLTHROUGH;
268
269 case LP_INTERP_CONSTANT:
270 case LP_INTERP_FACING:
271 a0aos = indexed_load(builder, fs_elem_type,
272 setup_bld->vec_type, a0_ptr, index, "");
273 attrib_name(a0aos, attrib, 0, ".a0aos");
274 break;
275
276 case LP_INTERP_POSITION:
277 /* Nothing to do as the position coeffs are already setup in slot 0 */
278 continue;
279
280 default:
281 assert(0);
282 break;
283 }
284 bld->a0aos[attrib] = a0aos;
285 bld->dadxaos[attrib] = dadxaos;
286 bld->dadyaos[attrib] = dadyaos;
287 }
288 }
289
290
291 /**
292 * Interpolate the shader input attribute values.
293 * This is called for each (group of) quad(s).
294 */
295 static void
attribs_update_simple(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef sample_id,int start,int end)296 attribs_update_simple(struct lp_build_interp_soa_context *bld,
297 struct gallivm_state *gallivm,
298 LLVMValueRef loop_iter,
299 LLVMTypeRef mask_type,
300 LLVMValueRef mask_store,
301 LLVMValueRef sample_id,
302 int start,
303 int end)
304 {
305 LLVMBuilderRef builder = gallivm->builder;
306 struct lp_build_context *coeff_bld = &bld->coeff_bld;
307 struct lp_build_context *setup_bld = &bld->setup_bld;
308 LLVMValueRef oow = NULL;
309 LLVMValueRef pixoffx;
310 LLVMValueRef pixoffy;
311 LLVMValueRef ptr;
312 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm,
313 coeff_bld->type, 0.5);
314
315 /* could do this with code-generated passed in pixel offsets too */
316
317 assert(loop_iter);
318 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store,
319 &loop_iter, 1, "");
320 pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
321 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store,
322 &loop_iter, 1, "");
323 pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
324
325 pixoffx = LLVMBuildFAdd(builder, pixoffx,
326 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
327 pixoffy = LLVMBuildFAdd(builder, pixoffy,
328 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
329
330 for (unsigned attrib = start; attrib < end; attrib++) {
331 const unsigned mask = bld->mask[attrib];
332 const enum lp_interp interp = bld->interp[attrib];
333 const enum tgsi_interpolate_loc loc = bld->interp_loc[attrib];
334
335 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
336 if (mask & (1 << chan)) {
337 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
338 LLVMValueRef dadx = coeff_bld->zero;
339 LLVMValueRef dady = coeff_bld->zero;
340 LLVMValueRef a = coeff_bld->zero;
341 LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
342
343 switch (interp) {
344 case LP_INTERP_PERSPECTIVE:
345 FALLTHROUGH;
346
347 case LP_INTERP_LINEAR:
348 if (attrib == 0 && chan == 0) {
349 dadx = coeff_bld->one;
350 if (sample_id) {
351 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
352 x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
353 bld->sample_pos_array, x_val_idx);
354 a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
355 } else {
356 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
357 }
358 }
359 else if (attrib == 0 && chan == 1) {
360 dady = coeff_bld->one;
361 if (sample_id) {
362 LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
363 y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
364 y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
365 bld->sample_pos_array, y_val_idx);
366 a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
367 } else {
368 a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
369 }
370 }
371 else {
372 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
373 coeff_bld->type, bld->dadxaos[attrib],
374 index);
375 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
376 coeff_bld->type, bld->dadyaos[attrib],
377 index);
378 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
379 coeff_bld->type, bld->a0aos[attrib],
380 index);
381
382 if (bld->coverage_samples > 1) {
383 LLVMValueRef xoffset = pix_center_offset;
384 LLVMValueRef yoffset = pix_center_offset;
385 if (loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
386 (attrib == 0 && chan == 2 && sample_id)) {
387 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
388 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
389
390 x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
391 bld->sample_pos_array, x_val_idx);
392 y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
393 bld->sample_pos_array, y_val_idx);
394 xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
395 yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
396 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
397 calc_centroid_offsets(bld, gallivm, loop_iter, mask_type, mask_store,
398 pix_center_offset, &xoffset, &yoffset);
399 }
400 chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
401 chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
402 }
403 }
404
405 /*
406 * a = a0 + (x * dadx + y * dady)
407 */
408 a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
409 a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
410
411 if (interp == LP_INTERP_PERSPECTIVE) {
412 if (oow == NULL) {
413 LLVMValueRef w = bld->attribs[0][3];
414 assert(attrib != 0);
415 assert(bld->mask[0] & TGSI_WRITEMASK_W);
416 oow = lp_build_rcp(coeff_bld, w);
417 }
418 a = lp_build_mul(coeff_bld, a, oow);
419 }
420 break;
421
422 case LP_INTERP_CONSTANT:
423 case LP_INTERP_FACING:
424 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
425 coeff_bld->type, bld->a0aos[attrib],
426 index);
427 break;
428
429 case LP_INTERP_POSITION:
430 assert(attrib > 0);
431 a = bld->attribs[0][chan];
432 break;
433
434 default:
435 assert(0);
436 break;
437 }
438
439 if ((attrib == 0) && (chan == 2)) {
440 /* add polygon-offset value, stored in the X component of a0 */
441 LLVMValueRef offset =
442 lp_build_extract_broadcast(gallivm, setup_bld->type,
443 coeff_bld->type, bld->a0aos[0],
444 lp_build_const_int32(gallivm, 0));
445 a = LLVMBuildFAdd(builder, a, offset, "");
446 }
447
448 bld->attribs[attrib][chan] = a;
449 }
450 }
451 }
452 }
453
454
455 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned attrib,unsigned chan,LLVMValueRef indir_index,LLVMValueRef pixoffx,LLVMValueRef pixoffy)456 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
457 struct gallivm_state *gallivm,
458 unsigned attrib, unsigned chan,
459 LLVMValueRef indir_index,
460 LLVMValueRef pixoffx,
461 LLVMValueRef pixoffy)
462 {
463 LLVMBuilderRef builder = gallivm->builder;
464 struct lp_build_context *coeff_bld = &bld->coeff_bld;
465 const enum lp_interp interp = bld->interp[attrib];
466 LLVMValueRef dadx = coeff_bld->zero;
467 LLVMValueRef dady = coeff_bld->zero;
468 LLVMValueRef a = coeff_bld->zero;
469 LLVMTypeRef u8ptr =
470 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
471
472 indir_index = LLVMBuildAdd(builder, indir_index,
473 lp_build_const_int_vec(gallivm, coeff_bld->type,
474 attrib), "");
475 LLVMValueRef index = LLVMBuildMul(builder, indir_index,
476 lp_build_const_int_vec(gallivm,
477 coeff_bld->type,
478 4), "");
479 index = LLVMBuildAdd(builder, index,
480 lp_build_const_int_vec(gallivm,
481 coeff_bld->type, chan), "");
482
483 /* size up to byte indices */
484 index = LLVMBuildMul(builder, index,
485 lp_build_const_int_vec(gallivm, coeff_bld->type, 4),
486 "");
487
488 struct lp_type dst_type = coeff_bld->type;
489 dst_type.length = 1;
490 switch (interp) {
491 case LP_INTERP_PERSPECTIVE:
492 FALLTHROUGH;
493 case LP_INTERP_LINEAR:
494 dadx = lp_build_gather(gallivm, coeff_bld->type.length,
495 coeff_bld->type.width, dst_type,
496 true, LLVMBuildBitCast(builder, bld->dadx_ptr,
497 u8ptr, ""),
498 index, false);
499
500 dady = lp_build_gather(gallivm, coeff_bld->type.length,
501 coeff_bld->type.width, dst_type,
502 true, LLVMBuildBitCast(builder, bld->dady_ptr,
503 u8ptr, ""),
504 index, false);
505
506 a = lp_build_gather(gallivm, coeff_bld->type.length,
507 coeff_bld->type.width, dst_type,
508 true, LLVMBuildBitCast(builder, bld->a0_ptr,
509 u8ptr, ""),
510 index, false);
511
512 /*
513 * a = a0 + (x * dadx + y * dady)
514 */
515 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
516 a = lp_build_fmuladd(builder, dady, pixoffy, a);
517
518 if (interp == LP_INTERP_PERSPECTIVE) {
519 LLVMValueRef w = bld->attribs[0][3];
520 assert(attrib != 0);
521 assert(bld->mask[0] & TGSI_WRITEMASK_W);
522 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
523 a = lp_build_mul(coeff_bld, a, oow);
524 }
525
526 break;
527 case LP_INTERP_CONSTANT:
528 case LP_INTERP_FACING:
529 a = lp_build_gather(gallivm, coeff_bld->type.length,
530 coeff_bld->type.width, dst_type,
531 true, LLVMBuildBitCast(builder, bld->a0_ptr,
532 u8ptr, ""),
533 index, false);
534 break;
535 default:
536 assert(0);
537 break;
538 }
539 return a;
540 }
541
542
543 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,unsigned attrib,unsigned chan,enum tgsi_interpolate_loc loc,LLVMValueRef indir_index,LLVMValueRef offsets[2])544 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
545 struct gallivm_state *gallivm,
546 LLVMValueRef loop_iter,
547 LLVMTypeRef mask_type,
548 LLVMValueRef mask_store,
549 unsigned attrib, unsigned chan,
550 enum tgsi_interpolate_loc loc,
551 LLVMValueRef indir_index,
552 LLVMValueRef offsets[2])
553 {
554 LLVMBuilderRef builder = gallivm->builder;
555 struct lp_build_context *coeff_bld = &bld->coeff_bld;
556 struct lp_build_context *setup_bld = &bld->setup_bld;
557 LLVMValueRef pixoffx;
558 LLVMValueRef pixoffy;
559 LLVMValueRef ptr;
560
561 /* could do this with code-generated passed in pixel offsets too */
562
563 assert(loop_iter);
564 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store,
565 &loop_iter, 1, "");
566 pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
567 ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store,
568 &loop_iter, 1, "");
569 pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
570
571 pixoffx = LLVMBuildFAdd(builder, pixoffx,
572 lp_build_broadcast_scalar(coeff_bld, bld->x), "");
573 pixoffy = LLVMBuildFAdd(builder, pixoffy,
574 lp_build_broadcast_scalar(coeff_bld, bld->y), "");
575
576 LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm,
577 coeff_bld->type, 0.5);
578
579 if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
580 if (bld->coverage_samples > 1) {
581 pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
582 pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
583 }
584
585 if (offsets[0])
586 pixoffx = LLVMBuildFAdd(builder, pixoffx,
587 offsets[0], "");
588 if (offsets[1])
589 pixoffy = LLVMBuildFAdd(builder, pixoffy,
590 offsets[1], "");
591 } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
592 LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0],
593 lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
594 LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx,
595 lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
596
597 LLVMValueRef base_ptr =
598 LLVMBuildBitCast(gallivm->builder,
599 bld->sample_pos_array,
600 LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
601 LLVMValueRef xoffset = lp_build_gather(gallivm,
602 bld->coeff_bld.type.length,
603 bld->coeff_bld.type.width,
604 lp_elem_type(bld->coeff_bld.type),
605 false,
606 base_ptr,
607 x_val_idx, true);
608 LLVMValueRef yoffset = lp_build_gather(gallivm,
609 bld->coeff_bld.type.length,
610 bld->coeff_bld.type.width,
611 lp_elem_type(bld->coeff_bld.type),
612 false,
613 base_ptr,
614 y_val_idx, true);
615
616 if (bld->coverage_samples > 1) {
617 pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
618 pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
619 }
620 } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
621 LLVMValueRef centroid_x_offset, centroid_y_offset;
622
623 /* for centroid find covered samples for this quad. */
624 /* if all samples are covered use pixel centers */
625 if (bld->coverage_samples > 1) {
626 calc_centroid_offsets(bld, gallivm, loop_iter, mask_type, mask_store,
627 pix_center_offset, ¢roid_x_offset,
628 ¢roid_y_offset);
629
630 pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
631 pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
632 }
633 }
634
635 // remap attrib properly.
636 attrib++;
637
638 if (indir_index)
639 return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
640 indir_index, pixoffx, pixoffy);
641
642
643 const enum lp_interp interp = bld->interp[attrib];
644 LLVMValueRef dadx = coeff_bld->zero;
645 LLVMValueRef dady = coeff_bld->zero;
646 LLVMValueRef a = coeff_bld->zero;
647
648 LLVMValueRef index = lp_build_const_int32(gallivm, chan);
649
650 switch (interp) {
651 case LP_INTERP_PERSPECTIVE:
652 FALLTHROUGH;
653 case LP_INTERP_LINEAR:
654 dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
655 coeff_bld->type, bld->dadxaos[attrib],
656 index);
657
658 dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
659 coeff_bld->type, bld->dadyaos[attrib],
660 index);
661
662 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
663 coeff_bld->type, bld->a0aos[attrib],
664 index);
665
666 /*
667 * a = a0 + (x * dadx + y * dady)
668 */
669 a = lp_build_fmuladd(builder, dadx, pixoffx, a);
670 a = lp_build_fmuladd(builder, dady, pixoffy, a);
671
672 if (interp == LP_INTERP_PERSPECTIVE) {
673 LLVMValueRef w = bld->attribs[0][3];
674 assert(attrib != 0);
675 assert(bld->mask[0] & TGSI_WRITEMASK_W);
676 LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
677 a = lp_build_mul(coeff_bld, a, oow);
678 }
679
680 break;
681 case LP_INTERP_CONSTANT:
682 case LP_INTERP_FACING:
683 a = lp_build_extract_broadcast(gallivm, setup_bld->type,
684 coeff_bld->type, bld->a0aos[attrib],
685 index);
686 break;
687 default:
688 assert(0);
689 break;
690 }
691 return a;
692 }
693
694
695 /**
696 * Generate the position vectors.
697 *
698 * Parameter x0, y0 are the integer values with upper left coordinates.
699 */
700 static void
pos_init(struct lp_build_interp_soa_context * bld,LLVMValueRef x0,LLVMValueRef y0)701 pos_init(struct lp_build_interp_soa_context *bld,
702 LLVMValueRef x0,
703 LLVMValueRef y0)
704 {
705 LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
706 struct lp_build_context *coeff_bld = &bld->coeff_bld;
707
708 bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
709 bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
710 }
711
712
713 /**
714 * Initialize fragment shader input attribute info.
715 */
716 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned num_inputs,const struct lp_shader_input * inputs,bool pixel_center_integer,unsigned coverage_samples,LLVMTypeRef sample_pos_array_type,LLVMValueRef sample_pos_array,LLVMValueRef num_loop,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr,LLVMValueRef x0,LLVMValueRef y0)717 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
718 struct gallivm_state *gallivm,
719 unsigned num_inputs,
720 const struct lp_shader_input *inputs,
721 bool pixel_center_integer,
722 unsigned coverage_samples,
723 LLVMTypeRef sample_pos_array_type,
724 LLVMValueRef sample_pos_array,
725 LLVMValueRef num_loop,
726 LLVMBuilderRef builder,
727 struct lp_type type,
728 LLVMValueRef a0_ptr,
729 LLVMValueRef dadx_ptr,
730 LLVMValueRef dady_ptr,
731 LLVMValueRef x0,
732 LLVMValueRef y0)
733 {
734 struct lp_type coeff_type;
735 struct lp_type setup_type;
736 unsigned attrib;
737 unsigned chan;
738
739 memset(bld, 0, sizeof *bld);
740
741 memset(&coeff_type, 0, sizeof coeff_type);
742 coeff_type.floating = true;
743 coeff_type.sign = true;
744 coeff_type.width = 32;
745 coeff_type.length = type.length;
746
747 memset(&setup_type, 0, sizeof setup_type);
748 setup_type.floating = true;
749 setup_type.sign = true;
750 setup_type.width = 32;
751 setup_type.length = TGSI_NUM_CHANNELS;
752
753
754 /* XXX: we don't support interpolating into any other types */
755 assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
756
757 lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
758 lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
759
760 /* For convenience */
761 bld->pos = bld->attribs[0];
762 bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
763
764 /* Position */
765 bld->mask[0] = TGSI_WRITEMASK_XYZW;
766 bld->interp[0] = LP_INTERP_LINEAR;
767 bld->interp_loc[0] = 0;
768
769 /* Inputs */
770 for (attrib = 0; attrib < num_inputs; ++attrib) {
771 bld->mask[1 + attrib] = inputs[attrib].usage_mask;
772 bld->interp[1 + attrib] = inputs[attrib].interp;
773 bld->interp_loc[1 + attrib] = inputs[attrib].location;
774 }
775 bld->num_attribs = 1 + num_inputs;
776
777 /* needed for indirect */
778 bld->a0_ptr = a0_ptr;
779 bld->dadx_ptr = dadx_ptr;
780 bld->dady_ptr = dady_ptr;
781
782 /* Ensure all masked out input channels have a valid value */
783 for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
784 for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
785 bld->attribs[attrib][chan] = bld->coeff_bld.undef;
786 }
787 }
788
789 if (pixel_center_integer) {
790 bld->pos_offset = 0.0;
791 } else {
792 bld->pos_offset = 0.5;
793 }
794 bld->coverage_samples = coverage_samples;
795 bld->num_loop = num_loop;
796 bld->sample_pos_array_type = sample_pos_array_type;
797 bld->sample_pos_array = sample_pos_array;
798
799 pos_init(bld, x0, y0);
800
801 /*
802 * Simple method (single step interpolation) may be slower if vector length
803 * is just 4, but the results are different (generally less accurate) with
804 * the other method, so always use more accurate version.
805 */
806 {
807 /* XXX this should use a global static table */
808 unsigned i;
809 unsigned num_loops = 16 / type.length;
810 LLVMValueRef pixoffx, pixoffy, index;
811 LLVMValueRef ptr;
812
813 bld->store_elem_type = lp_build_vec_type(gallivm, type);
814 bld->xoffset_store =
815 lp_build_array_alloca(gallivm, bld->store_elem_type,
816 lp_build_const_int32(gallivm, num_loops), "");
817 bld->yoffset_store =
818 lp_build_array_alloca(gallivm, bld->store_elem_type,
819 lp_build_const_int32(gallivm, num_loops), "");
820 for (i = 0; i < num_loops; i++) {
821 index = lp_build_const_int32(gallivm, i);
822 calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
823 ptr = LLVMBuildGEP2(builder, bld->store_elem_type,
824 bld->xoffset_store, &index, 1, "");
825 LLVMBuildStore(builder, pixoffx, ptr);
826 ptr = LLVMBuildGEP2(builder, bld->store_elem_type,
827 bld->yoffset_store, &index, 1, "");
828 LLVMBuildStore(builder, pixoffy, ptr);
829 }
830 }
831 coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
832 }
833
834
835 /*
836 * Advance the position and inputs to the given quad within the block.
837 */
838
839 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef sample_id)840 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
841 struct gallivm_state *gallivm,
842 LLVMValueRef quad_start_index,
843 LLVMTypeRef mask_type,
844 LLVMValueRef mask_store,
845 LLVMValueRef sample_id)
846 {
847 attribs_update_simple(bld, gallivm, quad_start_index, mask_type,
848 mask_store, sample_id, 1, bld->num_attribs);
849 }
850
851
852 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef sample_id)853 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
854 struct gallivm_state *gallivm,
855 LLVMValueRef quad_start_index,
856 LLVMValueRef sample_id)
857 {
858 attribs_update_simple(bld, gallivm, quad_start_index,
859 NULL, NULL, sample_id, 0, 1);
860 }
861
862