xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_bld_interp.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2009 VMware, Inc.
4  * Copyright 2007-2008 VMware, Inc.
5  * All Rights Reserved.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the
9  * "Software"), to deal in the Software without restriction, including
10  * without limitation the rights to use, copy, modify, merge, publish,
11  * distribute, sub license, and/or sell copies of the Software, and to
12  * permit persons to whom the Software is furnished to do so, subject to
13  * the following conditions:
14  *
15  * The above copyright notice and this permission notice (including the
16  * next paragraph) shall be included in all copies or substantial portions
17  * of the Software.
18  *
19  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26  *
27  **************************************************************************/
28 
29 /**
30  * @file
31  * Position and shader input interpolation.
32  *
33  * @author Jose Fonseca <[email protected]>
34  */
35 
36 #include "pipe/p_shader_tokens.h"
37 #include "util/compiler.h"
38 #include "util/u_debug.h"
39 #include "util/u_memory.h"
40 #include "util/u_math.h"
41 #include "gallivm/lp_bld_debug.h"
42 #include "gallivm/lp_bld_const.h"
43 #include "gallivm/lp_bld_arit.h"
44 #include "gallivm/lp_bld_swizzle.h"
45 #include "gallivm/lp_bld_flow.h"
46 #include "gallivm/lp_bld_logic.h"
47 #include "gallivm/lp_bld_struct.h"
48 #include "gallivm/lp_bld_gather.h"
49 #include "lp_bld_interp.h"
50 
51 
52 /*
53  * The shader JIT function operates on blocks of quads.
54  * Each block has 2x2 quads and each quad has 2x2 pixels.
55  *
56  * We iterate over the quads in order 0, 1, 2, 3:
57  *
58  * #################
59  * #   |   #   |   #
60  * #---0---#---1---#
61  * #   |   #   |   #
62  * #################
63  * #   |   #   |   #
64  * #---2---#---3---#
65  * #   |   #   |   #
66  * #################
67  *
68  * If we iterate over multiple quads at once, quads 01 and 23 are processed
69  * together.
70  *
71  * Within each quad, we have four pixels which are represented in SOA
72  * order:
73  *
74  * #########
75  * # 0 | 1 #
76  * #---+---#
77  * # 2 | 3 #
78  * #########
79  *
80  * So the green channel (for example) of the four pixels is stored in
81  * a single vector register: {g0, g1, g2, g3}.
82  * The order stays the same even with multiple quads:
83  * 0 1 4 5
84  * 2 3 6 7
85  * is stored as g0..g7
86  */
87 
88 
89 /**
90  * Do one perspective divide per quad.
91  *
92  * For perspective interpolation, the final attribute value is given
93  *
94  *  a' = a/w = a * oow
95  *
96  * where
97  *
98  *  a = a0 + dadx*x + dady*y
99  *  w = w0 + dwdx*x + dwdy*y
100  *  oow = 1/w = 1/(w0 + dwdx*x + dwdy*y)
101  *
102  * Instead of computing the division per pixel, with this macro we compute the
103  * division on the upper left pixel of each quad, and use a linear
104  * approximation in the remaining pixels, given by:
105  *
106  *  da'dx = (dadx - dwdx*a)*oow
107  *  da'dy = (dady - dwdy*a)*oow
108  *
109  * Ironically, this actually makes things slower -- probably because the
110  * divide hardware unit is rarely used, whereas the multiply unit is typically
111  * already saturated.
112  */
113 #define PERSPECTIVE_DIVIDE_PER_QUAD 0
114 
115 
116 static const unsigned char quad_offset_x[16] = {0, 1, 0, 1, 2, 3, 2, 3, 0, 1, 0, 1, 2, 3, 2, 3};
117 static const unsigned char quad_offset_y[16] = {0, 0, 1, 1, 0, 0, 1, 1, 2, 2, 3, 3, 2, 2, 3, 3};
118 
119 
120 static void
attrib_name(LLVMValueRef val,unsigned attrib,unsigned chan,const char * suffix)121 attrib_name(LLVMValueRef val, unsigned attrib, unsigned chan, const char *suffix)
122 {
123    if (attrib == 0)
124       lp_build_name(val, "pos.%c%s", "xyzw"[chan], suffix);
125    else
126       lp_build_name(val, "input%u.%c%s", attrib - 1, "xyzw"[chan], suffix);
127 }
128 
129 
130 static void
calc_offsets(struct lp_build_context * coeff_bld,unsigned quad_start_index,LLVMValueRef * pixoffx,LLVMValueRef * pixoffy)131 calc_offsets(struct lp_build_context *coeff_bld,
132              unsigned quad_start_index,
133              LLVMValueRef *pixoffx,
134              LLVMValueRef *pixoffy)
135 {
136    unsigned num_pix = coeff_bld->type.length;
137    struct gallivm_state *gallivm = coeff_bld->gallivm;
138    LLVMBuilderRef builder = coeff_bld->gallivm->builder;
139    LLVMValueRef nr, pixxf, pixyf;
140 
141    *pixoffx = coeff_bld->undef;
142    *pixoffy = coeff_bld->undef;
143 
144    for (unsigned i = 0; i < num_pix; i++) {
145       nr = lp_build_const_int32(gallivm, i);
146       pixxf = lp_build_const_float(gallivm, quad_offset_x[i % num_pix] +
147                                    (quad_start_index & 1) * 2);
148       pixyf = lp_build_const_float(gallivm, quad_offset_y[i % num_pix] +
149                                    (quad_start_index & 2));
150       *pixoffx = LLVMBuildInsertElement(builder, *pixoffx, pixxf, nr, "");
151       *pixoffy = LLVMBuildInsertElement(builder, *pixoffy, pixyf, nr, "");
152    }
153 }
154 
155 
156 static void
calc_centroid_offsets(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef pix_center_offset,LLVMValueRef * centroid_x,LLVMValueRef * centroid_y)157 calc_centroid_offsets(struct lp_build_interp_soa_context *bld,
158                       struct gallivm_state *gallivm,
159                       LLVMValueRef loop_iter,
160                       LLVMTypeRef mask_type,
161                       LLVMValueRef mask_store,
162                       LLVMValueRef pix_center_offset,
163                       LLVMValueRef *centroid_x, LLVMValueRef *centroid_y)
164 {
165    struct lp_build_context *coeff_bld = &bld->coeff_bld;
166    LLVMBuilderRef builder = gallivm->builder;
167    LLVMValueRef s_mask_and = NULL;
168    LLVMValueRef centroid_x_offset = pix_center_offset;
169    LLVMValueRef centroid_y_offset = pix_center_offset;
170    for (int s = bld->coverage_samples - 1; s >= 0; s--) {
171       LLVMValueRef sample_cov;
172       LLVMValueRef s_mask_idx = LLVMBuildMul(builder, bld->num_loop, lp_build_const_int32(gallivm, s), "");
173 
174       s_mask_idx = LLVMBuildAdd(builder, s_mask_idx, loop_iter, "");
175       sample_cov = lp_build_pointer_get2(builder, mask_type, mask_store, s_mask_idx);
176       if (s == bld->coverage_samples - 1)
177          s_mask_and = sample_cov;
178       else
179          s_mask_and = LLVMBuildAnd(builder, s_mask_and, sample_cov, "");
180 
181       LLVMValueRef x_val_idx = lp_build_const_int32(gallivm, s * 2);
182       LLVMValueRef y_val_idx = lp_build_const_int32(gallivm, s * 2 + 1);
183 
184       x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
185                                       bld->sample_pos_array, x_val_idx);
186       y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
187                                       bld->sample_pos_array, y_val_idx);
188       x_val_idx = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
189       y_val_idx = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
190       centroid_x_offset = lp_build_select(coeff_bld, sample_cov, x_val_idx, centroid_x_offset);
191       centroid_y_offset = lp_build_select(coeff_bld, sample_cov, y_val_idx, centroid_y_offset);
192    }
193    *centroid_x = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_x_offset);
194    *centroid_y = lp_build_select(coeff_bld, s_mask_and, pix_center_offset, centroid_y_offset);
195 }
196 
197 
198 /* Note: this assumes the pointer to elem_type is in address space 0 */
199 static LLVMValueRef
load_casted(LLVMBuilderRef builder,LLVMTypeRef elem_type,LLVMValueRef ptr,const char * name)200 load_casted(LLVMBuilderRef builder, LLVMTypeRef elem_type,
201             LLVMValueRef ptr, const char *name) {
202    ptr = LLVMBuildBitCast(builder, ptr, LLVMPointerType(elem_type, 0), name);
203    return LLVMBuildLoad2(builder, elem_type, ptr, name);
204 }
205 
206 
207 static LLVMValueRef
indexed_load(LLVMBuilderRef builder,LLVMTypeRef gep_type,LLVMTypeRef elem_type,LLVMValueRef ptr,LLVMValueRef index,const char * name)208 indexed_load(LLVMBuilderRef builder, LLVMTypeRef gep_type,
209                   LLVMTypeRef elem_type, LLVMValueRef ptr,
210              LLVMValueRef index, const char *name) {
211    ptr = LLVMBuildGEP2(builder, gep_type, ptr, &index, 1, name);
212    return load_casted(builder, elem_type, ptr, name);
213 }
214 
215 
216 /* Much easier, and significantly less instructions in the per-stamp
217  * part (less than half) but overall more instructions so a loss if
218  * most quads are active. Might be a win though with larger vectors.
219  * No ability to do per-quad divide (doable but not implemented)
220  * Could be made to work with passed in pixel offsets (i.e. active quad
221  * merging).
222  */
223 static void
coeffs_init_simple(struct lp_build_interp_soa_context * bld,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr)224 coeffs_init_simple(struct lp_build_interp_soa_context *bld,
225                    LLVMValueRef a0_ptr,
226                    LLVMValueRef dadx_ptr,
227                    LLVMValueRef dady_ptr)
228 {
229    struct lp_build_context *coeff_bld = &bld->coeff_bld;
230    struct lp_build_context *setup_bld = &bld->setup_bld;
231    struct gallivm_state *gallivm = coeff_bld->gallivm;
232    LLVMBuilderRef builder = gallivm->builder;
233 
234    for (unsigned attrib = 0; attrib < bld->num_attribs; ++attrib) {
235       /*
236        * always fetch all 4 values for performance/simplicity
237        * Note: we do that here because it seems to generate better
238        * code. It generates a lot of moves initially but less
239        * moves later. As far as I can tell this looks like a
240        * llvm issue, instead of simply reloading the values from
241        * the passed in pointers it if it runs out of registers
242        * it spills/reloads them. Maybe some optimization passes
243        * would help.
244        * Might want to investigate this again later.
245        */
246       const enum lp_interp interp = bld->interp[attrib];
247       LLVMValueRef index = lp_build_const_int32(gallivm,
248                                 attrib * TGSI_NUM_CHANNELS);
249       LLVMValueRef dadxaos = setup_bld->zero;
250       LLVMValueRef dadyaos = setup_bld->zero;
251       LLVMValueRef a0aos = setup_bld->zero;
252 
253       /* See: lp_state_fs.c / generate_fragment() / fs_elem_type */
254       LLVMTypeRef fs_elem_type = LLVMFloatTypeInContext(gallivm->context);
255 
256       switch (interp) {
257       case LP_INTERP_PERSPECTIVE:
258          FALLTHROUGH;
259 
260       case LP_INTERP_LINEAR:
261          dadxaos = indexed_load(builder, fs_elem_type,
262                                 setup_bld->vec_type, dadx_ptr, index, "");
263          dadyaos = indexed_load(builder, fs_elem_type,
264                                 setup_bld->vec_type, dady_ptr, index, "");
265          attrib_name(dadxaos, attrib, 0, ".dadxaos");
266          attrib_name(dadyaos, attrib, 0, ".dadyaos");
267          FALLTHROUGH;
268 
269       case LP_INTERP_CONSTANT:
270       case LP_INTERP_FACING:
271          a0aos = indexed_load(builder, fs_elem_type,
272                               setup_bld->vec_type, a0_ptr, index, "");
273          attrib_name(a0aos, attrib, 0, ".a0aos");
274          break;
275 
276       case LP_INTERP_POSITION:
277          /* Nothing to do as the position coeffs are already setup in slot 0 */
278          continue;
279 
280       default:
281          assert(0);
282          break;
283       }
284       bld->a0aos[attrib] = a0aos;
285       bld->dadxaos[attrib] = dadxaos;
286       bld->dadyaos[attrib] = dadyaos;
287    }
288 }
289 
290 
291 /**
292  * Interpolate the shader input attribute values.
293  * This is called for each (group of) quad(s).
294  */
295 static void
attribs_update_simple(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef sample_id,int start,int end)296 attribs_update_simple(struct lp_build_interp_soa_context *bld,
297                       struct gallivm_state *gallivm,
298                       LLVMValueRef loop_iter,
299                       LLVMTypeRef mask_type,
300                       LLVMValueRef mask_store,
301                       LLVMValueRef sample_id,
302                       int start,
303                       int end)
304 {
305    LLVMBuilderRef builder = gallivm->builder;
306    struct lp_build_context *coeff_bld = &bld->coeff_bld;
307    struct lp_build_context *setup_bld = &bld->setup_bld;
308    LLVMValueRef oow = NULL;
309    LLVMValueRef pixoffx;
310    LLVMValueRef pixoffy;
311    LLVMValueRef ptr;
312    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm,
313                                                        coeff_bld->type, 0.5);
314 
315    /* could do this with code-generated passed in pixel offsets too */
316 
317    assert(loop_iter);
318    ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store,
319                        &loop_iter, 1, "");
320    pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
321    ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store,
322                        &loop_iter, 1, "");
323    pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
324 
325    pixoffx = LLVMBuildFAdd(builder, pixoffx,
326                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
327    pixoffy = LLVMBuildFAdd(builder, pixoffy,
328                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
329 
330    for (unsigned attrib = start; attrib < end; attrib++) {
331       const unsigned mask = bld->mask[attrib];
332       const enum lp_interp interp = bld->interp[attrib];
333       const enum tgsi_interpolate_loc loc = bld->interp_loc[attrib];
334 
335       for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
336          if (mask & (1 << chan)) {
337             LLVMValueRef index = lp_build_const_int32(gallivm, chan);
338             LLVMValueRef dadx = coeff_bld->zero;
339             LLVMValueRef dady = coeff_bld->zero;
340             LLVMValueRef a = coeff_bld->zero;
341             LLVMValueRef chan_pixoffx = pixoffx, chan_pixoffy = pixoffy;
342 
343             switch (interp) {
344             case LP_INTERP_PERSPECTIVE:
345                FALLTHROUGH;
346 
347             case LP_INTERP_LINEAR:
348                if (attrib == 0 && chan == 0) {
349                   dadx = coeff_bld->one;
350                   if (sample_id) {
351                      LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
352                      x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
353                                                      bld->sample_pos_array, x_val_idx);
354                      a = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
355                   } else {
356                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
357                   }
358                }
359                else if (attrib == 0 && chan == 1) {
360                   dady = coeff_bld->one;
361                   if (sample_id) {
362                      LLVMValueRef y_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
363                      y_val_idx = LLVMBuildAdd(gallivm->builder, y_val_idx, lp_build_const_int32(gallivm, 1), "");
364                      y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
365                                                      bld->sample_pos_array, y_val_idx);
366                      a = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
367                   } else {
368                      a = lp_build_const_vec(gallivm, coeff_bld->type, bld->pos_offset);
369                   }
370                }
371                else {
372                   dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
373                                                     coeff_bld->type, bld->dadxaos[attrib],
374                                                     index);
375                   dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
376                                                     coeff_bld->type, bld->dadyaos[attrib],
377                                                     index);
378                   a = lp_build_extract_broadcast(gallivm, setup_bld->type,
379                                                  coeff_bld->type, bld->a0aos[attrib],
380                                                  index);
381 
382                   if (bld->coverage_samples > 1) {
383                      LLVMValueRef xoffset = pix_center_offset;
384                      LLVMValueRef yoffset = pix_center_offset;
385                      if (loc == TGSI_INTERPOLATE_LOC_SAMPLE ||
386                          (attrib == 0 && chan == 2 && sample_id)) {
387                         LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, sample_id, lp_build_const_int32(gallivm, 2), "");
388                         LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx, lp_build_const_int32(gallivm, 1), "");
389 
390                         x_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
391                                                        bld->sample_pos_array, x_val_idx);
392                         y_val_idx = lp_build_array_get2(gallivm, bld->sample_pos_array_type,
393                                                         bld->sample_pos_array, y_val_idx);
394                         xoffset = lp_build_broadcast_scalar(coeff_bld, x_val_idx);
395                         yoffset = lp_build_broadcast_scalar(coeff_bld, y_val_idx);
396                      } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
397                         calc_centroid_offsets(bld, gallivm, loop_iter, mask_type, mask_store,
398                                               pix_center_offset, &xoffset, &yoffset);
399                      }
400                      chan_pixoffx = lp_build_add(coeff_bld, chan_pixoffx, xoffset);
401                      chan_pixoffy = lp_build_add(coeff_bld, chan_pixoffy, yoffset);
402                   }
403                }
404 
405                /*
406                 * a = a0 + (x * dadx + y * dady)
407                 */
408                a = lp_build_fmuladd(builder, dadx, chan_pixoffx, a);
409                a = lp_build_fmuladd(builder, dady, chan_pixoffy, a);
410 
411                if (interp == LP_INTERP_PERSPECTIVE) {
412                   if (oow == NULL) {
413                      LLVMValueRef w = bld->attribs[0][3];
414                      assert(attrib != 0);
415                      assert(bld->mask[0] & TGSI_WRITEMASK_W);
416                      oow = lp_build_rcp(coeff_bld, w);
417                   }
418                   a = lp_build_mul(coeff_bld, a, oow);
419                }
420                break;
421 
422             case LP_INTERP_CONSTANT:
423             case LP_INTERP_FACING:
424                a = lp_build_extract_broadcast(gallivm, setup_bld->type,
425                                               coeff_bld->type, bld->a0aos[attrib],
426                                               index);
427                break;
428 
429             case LP_INTERP_POSITION:
430                assert(attrib > 0);
431                a = bld->attribs[0][chan];
432                break;
433 
434             default:
435                assert(0);
436                break;
437             }
438 
439             if ((attrib == 0) && (chan == 2)) {
440                /* add polygon-offset value, stored in the X component of a0 */
441                LLVMValueRef offset =
442                   lp_build_extract_broadcast(gallivm, setup_bld->type,
443                                              coeff_bld->type, bld->a0aos[0],
444                                              lp_build_const_int32(gallivm, 0));
445                a = LLVMBuildFAdd(builder, a, offset, "");
446             }
447 
448             bld->attribs[attrib][chan] = a;
449          }
450       }
451    }
452 }
453 
454 
455 static LLVMValueRef
lp_build_interp_soa_indirect(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned attrib,unsigned chan,LLVMValueRef indir_index,LLVMValueRef pixoffx,LLVMValueRef pixoffy)456 lp_build_interp_soa_indirect(struct lp_build_interp_soa_context *bld,
457                              struct gallivm_state *gallivm,
458                              unsigned attrib, unsigned chan,
459                              LLVMValueRef indir_index,
460                              LLVMValueRef pixoffx,
461                              LLVMValueRef pixoffy)
462 {
463    LLVMBuilderRef builder = gallivm->builder;
464    struct lp_build_context *coeff_bld = &bld->coeff_bld;
465    const enum lp_interp interp = bld->interp[attrib];
466    LLVMValueRef dadx = coeff_bld->zero;
467    LLVMValueRef dady = coeff_bld->zero;
468    LLVMValueRef a = coeff_bld->zero;
469    LLVMTypeRef u8ptr =
470       LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0);
471 
472    indir_index = LLVMBuildAdd(builder, indir_index,
473                               lp_build_const_int_vec(gallivm, coeff_bld->type,
474                                                      attrib), "");
475    LLVMValueRef index = LLVMBuildMul(builder, indir_index,
476                                      lp_build_const_int_vec(gallivm,
477                                                             coeff_bld->type,
478                                                             4), "");
479    index = LLVMBuildAdd(builder, index,
480                         lp_build_const_int_vec(gallivm,
481                                                coeff_bld->type, chan), "");
482 
483    /* size up to byte indices */
484    index = LLVMBuildMul(builder, index,
485                         lp_build_const_int_vec(gallivm, coeff_bld->type, 4),
486                         "");
487 
488    struct lp_type dst_type = coeff_bld->type;
489    dst_type.length = 1;
490    switch (interp) {
491    case LP_INTERP_PERSPECTIVE:
492       FALLTHROUGH;
493    case LP_INTERP_LINEAR:
494       dadx = lp_build_gather(gallivm, coeff_bld->type.length,
495                              coeff_bld->type.width, dst_type,
496                              true, LLVMBuildBitCast(builder, bld->dadx_ptr,
497                                                     u8ptr, ""),
498                              index, false);
499 
500       dady = lp_build_gather(gallivm, coeff_bld->type.length,
501                              coeff_bld->type.width, dst_type,
502                              true, LLVMBuildBitCast(builder, bld->dady_ptr,
503                                                     u8ptr, ""),
504                              index, false);
505 
506       a = lp_build_gather(gallivm, coeff_bld->type.length,
507                           coeff_bld->type.width, dst_type,
508                           true, LLVMBuildBitCast(builder, bld->a0_ptr,
509                                                  u8ptr, ""),
510                           index, false);
511 
512       /*
513        * a = a0 + (x * dadx + y * dady)
514        */
515       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
516       a = lp_build_fmuladd(builder, dady, pixoffy, a);
517 
518       if (interp == LP_INTERP_PERSPECTIVE) {
519         LLVMValueRef w = bld->attribs[0][3];
520         assert(attrib != 0);
521         assert(bld->mask[0] & TGSI_WRITEMASK_W);
522         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
523         a = lp_build_mul(coeff_bld, a, oow);
524       }
525 
526       break;
527    case LP_INTERP_CONSTANT:
528    case LP_INTERP_FACING:
529       a = lp_build_gather(gallivm, coeff_bld->type.length,
530                           coeff_bld->type.width, dst_type,
531                           true, LLVMBuildBitCast(builder, bld->a0_ptr,
532                                                  u8ptr, ""),
533                           index, false);
534       break;
535    default:
536       assert(0);
537       break;
538    }
539    return a;
540 }
541 
542 
543 LLVMValueRef
lp_build_interp_soa(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef loop_iter,LLVMTypeRef mask_type,LLVMValueRef mask_store,unsigned attrib,unsigned chan,enum tgsi_interpolate_loc loc,LLVMValueRef indir_index,LLVMValueRef offsets[2])544 lp_build_interp_soa(struct lp_build_interp_soa_context *bld,
545                     struct gallivm_state *gallivm,
546                     LLVMValueRef loop_iter,
547                     LLVMTypeRef mask_type,
548                     LLVMValueRef mask_store,
549                     unsigned attrib, unsigned chan,
550                     enum tgsi_interpolate_loc loc,
551                     LLVMValueRef indir_index,
552                     LLVMValueRef offsets[2])
553 {
554    LLVMBuilderRef builder = gallivm->builder;
555    struct lp_build_context *coeff_bld = &bld->coeff_bld;
556    struct lp_build_context *setup_bld = &bld->setup_bld;
557    LLVMValueRef pixoffx;
558    LLVMValueRef pixoffy;
559    LLVMValueRef ptr;
560 
561    /* could do this with code-generated passed in pixel offsets too */
562 
563    assert(loop_iter);
564    ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->xoffset_store,
565                        &loop_iter, 1, "");
566    pixoffx = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
567    ptr = LLVMBuildGEP2(builder, bld->store_elem_type, bld->yoffset_store,
568                        &loop_iter, 1, "");
569    pixoffy = LLVMBuildLoad2(builder, bld->store_elem_type, ptr, "");
570 
571    pixoffx = LLVMBuildFAdd(builder, pixoffx,
572                            lp_build_broadcast_scalar(coeff_bld, bld->x), "");
573    pixoffy = LLVMBuildFAdd(builder, pixoffy,
574                            lp_build_broadcast_scalar(coeff_bld, bld->y), "");
575 
576    LLVMValueRef pix_center_offset = lp_build_const_vec(gallivm,
577                                                        coeff_bld->type, 0.5);
578 
579    if (loc == TGSI_INTERPOLATE_LOC_CENTER) {
580       if (bld->coverage_samples > 1) {
581          pixoffx = LLVMBuildFAdd(builder, pixoffx, pix_center_offset, "");
582          pixoffy = LLVMBuildFAdd(builder, pixoffy, pix_center_offset, "");
583       }
584 
585       if (offsets[0])
586          pixoffx = LLVMBuildFAdd(builder, pixoffx,
587                                  offsets[0], "");
588       if (offsets[1])
589          pixoffy = LLVMBuildFAdd(builder, pixoffy,
590                                  offsets[1], "");
591    } else if (loc == TGSI_INTERPOLATE_LOC_SAMPLE) {
592       LLVMValueRef x_val_idx = LLVMBuildMul(gallivm->builder, offsets[0],
593          lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 2 * 4), "");
594       LLVMValueRef y_val_idx = LLVMBuildAdd(gallivm->builder, x_val_idx,
595          lp_build_const_int_vec(gallivm, bld->coeff_bld.type, 4), "");
596 
597       LLVMValueRef base_ptr =
598          LLVMBuildBitCast(gallivm->builder,
599                           bld->sample_pos_array,
600                           LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
601       LLVMValueRef xoffset = lp_build_gather(gallivm,
602                                              bld->coeff_bld.type.length,
603                                              bld->coeff_bld.type.width,
604                                              lp_elem_type(bld->coeff_bld.type),
605                                              false,
606                                              base_ptr,
607                                              x_val_idx, true);
608       LLVMValueRef yoffset = lp_build_gather(gallivm,
609                                              bld->coeff_bld.type.length,
610                                              bld->coeff_bld.type.width,
611                                              lp_elem_type(bld->coeff_bld.type),
612                                              false,
613                                              base_ptr,
614                                              y_val_idx, true);
615 
616       if (bld->coverage_samples > 1) {
617          pixoffx = LLVMBuildFAdd(builder, pixoffx, xoffset, "");
618          pixoffy = LLVMBuildFAdd(builder, pixoffy, yoffset, "");
619       }
620    } else if (loc == TGSI_INTERPOLATE_LOC_CENTROID) {
621       LLVMValueRef centroid_x_offset, centroid_y_offset;
622 
623       /* for centroid find covered samples for this quad. */
624       /* if all samples are covered use pixel centers */
625       if (bld->coverage_samples > 1) {
626          calc_centroid_offsets(bld, gallivm, loop_iter, mask_type, mask_store,
627                                pix_center_offset, &centroid_x_offset,
628                                &centroid_y_offset);
629 
630          pixoffx = LLVMBuildFAdd(builder, pixoffx, centroid_x_offset, "");
631          pixoffy = LLVMBuildFAdd(builder, pixoffy, centroid_y_offset, "");
632       }
633    }
634 
635    // remap attrib properly.
636    attrib++;
637 
638    if (indir_index)
639      return lp_build_interp_soa_indirect(bld, gallivm, attrib, chan,
640                                          indir_index, pixoffx, pixoffy);
641 
642 
643    const enum lp_interp interp = bld->interp[attrib];
644    LLVMValueRef dadx = coeff_bld->zero;
645    LLVMValueRef dady = coeff_bld->zero;
646    LLVMValueRef a = coeff_bld->zero;
647 
648    LLVMValueRef index = lp_build_const_int32(gallivm, chan);
649 
650    switch (interp) {
651    case LP_INTERP_PERSPECTIVE:
652       FALLTHROUGH;
653    case LP_INTERP_LINEAR:
654       dadx = lp_build_extract_broadcast(gallivm, setup_bld->type,
655                                         coeff_bld->type, bld->dadxaos[attrib],
656                                         index);
657 
658       dady = lp_build_extract_broadcast(gallivm, setup_bld->type,
659                                         coeff_bld->type, bld->dadyaos[attrib],
660                                         index);
661 
662       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
663                                      coeff_bld->type, bld->a0aos[attrib],
664                                      index);
665 
666       /*
667        * a = a0 + (x * dadx + y * dady)
668        */
669       a = lp_build_fmuladd(builder, dadx, pixoffx, a);
670       a = lp_build_fmuladd(builder, dady, pixoffy, a);
671 
672       if (interp == LP_INTERP_PERSPECTIVE) {
673         LLVMValueRef w = bld->attribs[0][3];
674         assert(attrib != 0);
675         assert(bld->mask[0] & TGSI_WRITEMASK_W);
676         LLVMValueRef oow = lp_build_rcp(coeff_bld, w);
677         a = lp_build_mul(coeff_bld, a, oow);
678       }
679 
680       break;
681    case LP_INTERP_CONSTANT:
682    case LP_INTERP_FACING:
683       a = lp_build_extract_broadcast(gallivm, setup_bld->type,
684                                      coeff_bld->type, bld->a0aos[attrib],
685                                      index);
686       break;
687    default:
688       assert(0);
689       break;
690    }
691    return a;
692 }
693 
694 
695 /**
696  * Generate the position vectors.
697  *
698  * Parameter x0, y0 are the integer values with upper left coordinates.
699  */
700 static void
pos_init(struct lp_build_interp_soa_context * bld,LLVMValueRef x0,LLVMValueRef y0)701 pos_init(struct lp_build_interp_soa_context *bld,
702          LLVMValueRef x0,
703          LLVMValueRef y0)
704 {
705    LLVMBuilderRef builder = bld->coeff_bld.gallivm->builder;
706    struct lp_build_context *coeff_bld = &bld->coeff_bld;
707 
708    bld->x = LLVMBuildSIToFP(builder, x0, coeff_bld->elem_type, "");
709    bld->y = LLVMBuildSIToFP(builder, y0, coeff_bld->elem_type, "");
710 }
711 
712 
713 /**
714  * Initialize fragment shader input attribute info.
715  */
716 void
lp_build_interp_soa_init(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,unsigned num_inputs,const struct lp_shader_input * inputs,bool pixel_center_integer,unsigned coverage_samples,LLVMTypeRef sample_pos_array_type,LLVMValueRef sample_pos_array,LLVMValueRef num_loop,LLVMBuilderRef builder,struct lp_type type,LLVMValueRef a0_ptr,LLVMValueRef dadx_ptr,LLVMValueRef dady_ptr,LLVMValueRef x0,LLVMValueRef y0)717 lp_build_interp_soa_init(struct lp_build_interp_soa_context *bld,
718                          struct gallivm_state *gallivm,
719                          unsigned num_inputs,
720                          const struct lp_shader_input *inputs,
721                          bool pixel_center_integer,
722                          unsigned coverage_samples,
723                          LLVMTypeRef sample_pos_array_type,
724                          LLVMValueRef sample_pos_array,
725                          LLVMValueRef num_loop,
726                          LLVMBuilderRef builder,
727                          struct lp_type type,
728                          LLVMValueRef a0_ptr,
729                          LLVMValueRef dadx_ptr,
730                          LLVMValueRef dady_ptr,
731                          LLVMValueRef x0,
732                          LLVMValueRef y0)
733 {
734    struct lp_type coeff_type;
735    struct lp_type setup_type;
736    unsigned attrib;
737    unsigned chan;
738 
739    memset(bld, 0, sizeof *bld);
740 
741    memset(&coeff_type, 0, sizeof coeff_type);
742    coeff_type.floating = true;
743    coeff_type.sign = true;
744    coeff_type.width = 32;
745    coeff_type.length = type.length;
746 
747    memset(&setup_type, 0, sizeof setup_type);
748    setup_type.floating = true;
749    setup_type.sign = true;
750    setup_type.width = 32;
751    setup_type.length = TGSI_NUM_CHANNELS;
752 
753 
754    /* XXX: we don't support interpolating into any other types */
755    assert(memcmp(&coeff_type, &type, sizeof coeff_type) == 0);
756 
757    lp_build_context_init(&bld->coeff_bld, gallivm, coeff_type);
758    lp_build_context_init(&bld->setup_bld, gallivm, setup_type);
759 
760    /* For convenience */
761    bld->pos = bld->attribs[0];
762    bld->inputs = (const LLVMValueRef (*)[TGSI_NUM_CHANNELS]) bld->attribs[1];
763 
764    /* Position */
765    bld->mask[0] = TGSI_WRITEMASK_XYZW;
766    bld->interp[0] = LP_INTERP_LINEAR;
767    bld->interp_loc[0] = 0;
768 
769    /* Inputs */
770    for (attrib = 0; attrib < num_inputs; ++attrib) {
771       bld->mask[1 + attrib] = inputs[attrib].usage_mask;
772       bld->interp[1 + attrib] = inputs[attrib].interp;
773       bld->interp_loc[1 + attrib] = inputs[attrib].location;
774    }
775    bld->num_attribs = 1 + num_inputs;
776 
777    /* needed for indirect */
778    bld->a0_ptr = a0_ptr;
779    bld->dadx_ptr = dadx_ptr;
780    bld->dady_ptr = dady_ptr;
781 
782    /* Ensure all masked out input channels have a valid value */
783    for (attrib = 0; attrib < bld->num_attribs; ++attrib) {
784       for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) {
785          bld->attribs[attrib][chan] = bld->coeff_bld.undef;
786       }
787    }
788 
789    if (pixel_center_integer) {
790       bld->pos_offset = 0.0;
791    } else {
792       bld->pos_offset = 0.5;
793    }
794    bld->coverage_samples = coverage_samples;
795    bld->num_loop = num_loop;
796    bld->sample_pos_array_type = sample_pos_array_type;
797    bld->sample_pos_array = sample_pos_array;
798 
799    pos_init(bld, x0, y0);
800 
801    /*
802     * Simple method (single step interpolation) may be slower if vector length
803     * is just 4, but the results are different (generally less accurate) with
804     * the other method, so always use more accurate version.
805     */
806    {
807       /* XXX this should use a global static table */
808       unsigned i;
809       unsigned num_loops = 16 / type.length;
810       LLVMValueRef pixoffx, pixoffy, index;
811       LLVMValueRef ptr;
812 
813       bld->store_elem_type = lp_build_vec_type(gallivm, type);
814       bld->xoffset_store =
815          lp_build_array_alloca(gallivm, bld->store_elem_type,
816                                lp_build_const_int32(gallivm, num_loops), "");
817       bld->yoffset_store =
818          lp_build_array_alloca(gallivm, bld->store_elem_type,
819                                lp_build_const_int32(gallivm, num_loops), "");
820       for (i = 0; i < num_loops; i++) {
821          index = lp_build_const_int32(gallivm, i);
822          calc_offsets(&bld->coeff_bld, i*type.length/4, &pixoffx, &pixoffy);
823          ptr = LLVMBuildGEP2(builder, bld->store_elem_type,
824                              bld->xoffset_store, &index, 1, "");
825          LLVMBuildStore(builder, pixoffx, ptr);
826          ptr = LLVMBuildGEP2(builder, bld->store_elem_type,
827                              bld->yoffset_store, &index, 1, "");
828          LLVMBuildStore(builder, pixoffy, ptr);
829       }
830    }
831    coeffs_init_simple(bld, a0_ptr, dadx_ptr, dady_ptr);
832 }
833 
834 
835 /*
836  * Advance the position and inputs to the given quad within the block.
837  */
838 
839 void
lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMTypeRef mask_type,LLVMValueRef mask_store,LLVMValueRef sample_id)840 lp_build_interp_soa_update_inputs_dyn(struct lp_build_interp_soa_context *bld,
841                                       struct gallivm_state *gallivm,
842                                       LLVMValueRef quad_start_index,
843                                       LLVMTypeRef mask_type,
844                                       LLVMValueRef mask_store,
845                                       LLVMValueRef sample_id)
846 {
847    attribs_update_simple(bld, gallivm, quad_start_index, mask_type,
848                          mask_store, sample_id, 1, bld->num_attribs);
849 }
850 
851 
852 void
lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context * bld,struct gallivm_state * gallivm,LLVMValueRef quad_start_index,LLVMValueRef sample_id)853 lp_build_interp_soa_update_pos_dyn(struct lp_build_interp_soa_context *bld,
854                                    struct gallivm_state *gallivm,
855                                    LLVMValueRef quad_start_index,
856                                    LLVMValueRef sample_id)
857 {
858    attribs_update_simple(bld, gallivm, quad_start_index,
859                          NULL, NULL, sample_id, 0, 1);
860 }
861 
862