1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- SoA.
31 *
32 * @author Jose Fonseca <[email protected]>
33 * @author Brian Paul <[email protected]>
34 */
35
36 #include "pipe/p_defines.h"
37 #include "pipe/p_state.h"
38 #include "pipe/p_shader_tokens.h"
39 #include "util/bitset.h"
40 #include "util/compiler.h"
41 #include "util/u_debug.h"
42 #include "util/u_dump.h"
43 #include "util/u_memory.h"
44 #include "util/u_math.h"
45 #include "util/format/u_format.h"
46 #include "util/u_cpu_detect.h"
47 #include "util/format_rgb9e5.h"
48 #include "lp_bld_debug.h"
49 #include "lp_bld_type.h"
50 #include "lp_bld_const.h"
51 #include "lp_bld_conv.h"
52 #include "lp_bld_arit.h"
53 #include "lp_bld_bitarit.h"
54 #include "lp_bld_logic.h"
55 #include "lp_bld_printf.h"
56 #include "lp_bld_swizzle.h"
57 #include "lp_bld_flow.h"
58 #include "lp_bld_gather.h"
59 #include "lp_bld_format.h"
60 #include "lp_bld_sample.h"
61 #include "lp_bld_sample_aos.h"
62 #include "lp_bld_struct.h"
63 #include "lp_bld_quad.h"
64 #include "lp_bld_pack.h"
65 #include "lp_bld_intr.h"
66 #include "lp_bld_misc.h"
67 #include "lp_bld_jit_types.h"
68
69 static void
lp_build_gather_resident(struct lp_build_context * bld,struct lp_sampler_dynamic_state * dynamic_state,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMValueRef offset,LLVMValueRef * out_resident)70 lp_build_gather_resident(struct lp_build_context *bld,
71 struct lp_sampler_dynamic_state *dynamic_state,
72 LLVMTypeRef resources_type,
73 LLVMValueRef resources_ptr,
74 LLVMValueRef offset,
75 LLVMValueRef *out_resident)
76 {
77 struct lp_type type = lp_int_type(bld->type);
78
79 struct gallivm_state *gallivm = bld->gallivm;
80 LLVMBuilderRef builder = gallivm->builder;
81
82 static_assert(sizeof(BITSET_WORD) == 4, "Unexpected BITSET_WORD size");
83
84 LLVMValueRef residency =
85 dynamic_state->residency(gallivm, resources_type, resources_ptr, 0, NULL);
86
87 LLVMValueRef tile_size_log2 =
88 lp_build_const_int_vec(gallivm, type, util_logbase2(64 * 1024));
89 LLVMValueRef tile_index = LLVMBuildLShr(builder, offset, tile_size_log2, "");
90
91 LLVMValueRef dword_bitsize_log2 =
92 lp_build_const_int_vec(gallivm, type, util_logbase2(32));
93 LLVMValueRef dword_index = LLVMBuildLShr(builder, tile_index, dword_bitsize_log2, "");
94
95 LLVMValueRef dword_size_log2 =
96 lp_build_const_int_vec(gallivm, type, util_logbase2(4));
97 LLVMValueRef dword_offset = LLVMBuildShl(builder, dword_index, dword_size_log2, "");
98
99 residency = lp_build_gather(gallivm, type.length, type.width, lp_elem_type(type),
100 true, residency, dword_offset, true);
101
102 LLVMValueRef dword_bit_mask =
103 lp_build_const_int_vec(gallivm, type, 31);
104 LLVMValueRef bit_index = LLVMBuildAnd(builder, tile_index, dword_bit_mask, "");
105 LLVMValueRef bit_mask = LLVMBuildShl(builder, lp_build_one(gallivm, type), bit_index, "");
106
107 LLVMValueRef resident = LLVMBuildAnd(builder, residency, bit_mask, "");
108 resident = LLVMBuildICmp(builder, LLVMIntNE, resident, lp_build_zero(gallivm, type), "");
109
110 if (*out_resident)
111 *out_resident = LLVMBuildAnd(builder, *out_resident, resident, "");
112 else
113 *out_resident = resident;
114 }
115
116 /**
117 * Generate code to fetch a texel from a texture at int coords (x, y, z).
118 * The computation depends on whether the texture is 1D, 2D or 3D.
119 * The result, texel, will be float vectors:
120 * texel[0] = red values
121 * texel[1] = green values
122 * texel[2] = blue values
123 * texel[3] = alpha values
124 */
125 static void
lp_build_sample_texel_soa(struct lp_build_sample_context * bld,LLVMValueRef width,LLVMValueRef height,LLVMValueRef depth,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,LLVMValueRef texel_out[4])126 lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
127 LLVMValueRef width,
128 LLVMValueRef height,
129 LLVMValueRef depth,
130 LLVMValueRef x,
131 LLVMValueRef y,
132 LLVMValueRef z,
133 LLVMValueRef y_stride,
134 LLVMValueRef z_stride,
135 LLVMValueRef data_ptr,
136 LLVMValueRef mipoffsets,
137 LLVMValueRef ilevel,
138 LLVMValueRef texel_out[4])
139 {
140 const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
141 const unsigned dims = bld->dims;
142 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
143 LLVMBuilderRef builder = bld->gallivm->builder;
144 LLVMValueRef offset;
145 LLVMValueRef i, j;
146 LLVMValueRef use_border = NULL;
147
148 /* use_border = x < 0 || x >= width || y < 0 || y >= height */
149 if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
150 static_state->min_img_filter,
151 static_state->mag_img_filter)) {
152 LLVMValueRef b1, b2;
153 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
154 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
155 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
156 }
157
158 if (dims >= 2 &&
159 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
160 static_state->min_img_filter,
161 static_state->mag_img_filter)) {
162 LLVMValueRef b1, b2;
163 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
164 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
165 if (use_border) {
166 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
167 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
168 } else {
169 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
170 }
171 }
172
173 if (dims == 3 &&
174 lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
175 static_state->min_img_filter,
176 static_state->mag_img_filter)) {
177 LLVMValueRef b1, b2;
178 b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
179 b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
180 if (use_border) {
181 use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
182 use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
183 } else {
184 use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
185 }
186 }
187
188 /* convert x,y,z coords to linear offset from start of texture, in bytes */
189 if (bld->static_texture_state->tiled) {
190 lp_build_tiled_sample_offset(&bld->int_coord_bld,
191 bld->format_desc->format,
192 bld->static_texture_state,
193 x, y, z, width, height, z_stride,
194 &offset, &i, &j);
195 } else {
196 lp_build_sample_offset(&bld->int_coord_bld,
197 bld->format_desc,
198 x, y, z, y_stride, z_stride,
199 &offset, &i, &j);
200 }
201
202 if (mipoffsets) {
203 offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
204 }
205
206 if (use_border) {
207 /* If we can sample the border color, it means that texcoords may
208 * lie outside the bounds of the texture image. We need to do
209 * something to prevent reading out of bounds and causing a segfault.
210 *
211 * Simply AND the texture coords with !use_border. This will cause
212 * coords which are out of bounds to become zero. Zero's guaranteed
213 * to be inside the texture image.
214 */
215 offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
216 }
217
218 if (bld->residency) {
219 LLVMValueRef real_offset = offset;
220
221 if (!mipoffsets) {
222 mipoffsets = lp_build_get_mip_offsets(bld, ilevel);
223 real_offset = lp_build_add(&bld->int_coord_bld, real_offset, mipoffsets);
224
225 if (use_border)
226 real_offset = lp_build_andnot(&bld->int_coord_bld, real_offset, use_border);
227 }
228
229 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
230 bld->resources_type, bld->resources_ptr,
231 real_offset, &bld->resident);
232 }
233
234 lp_build_fetch_rgba_soa(bld->gallivm,
235 bld->format_desc,
236 bld->texel_type, true,
237 data_ptr, offset,
238 i, j,
239 bld->cache,
240 texel_out);
241
242 /*
243 * Note: if we find an app which frequently samples the texture border
244 * we might want to implement a true conditional here to avoid sampling
245 * the texture whenever possible (since that's quite a bit of code).
246 * Ex:
247 * if (use_border) {
248 * texel = border_color;
249 * } else {
250 * texel = sample_texture(coord);
251 * }
252 * As it is now, we always sample the texture, then selectively replace
253 * the texel color results with the border color.
254 */
255
256 if (use_border) {
257 /* select texel color or border color depending on use_border. */
258 const struct util_format_description *format_desc = bld->format_desc;
259 struct lp_type border_type = bld->texel_type;
260 border_type.length = 4;
261 /*
262 * Only replace channels which are actually present. The others should
263 * get optimized away eventually by sampler_view swizzle anyway but it's
264 * easier too.
265 */
266 for (unsigned chan = 0; chan < 4; chan++) {
267 unsigned chan_s;
268 /* reverse-map channel... */
269 if (util_format_has_stencil(format_desc)) {
270 if (chan == 0)
271 chan_s = 0;
272 else
273 break;
274 } else {
275 for (chan_s = 0; chan_s < 4; chan_s++) {
276 if (chan_s == format_desc->swizzle[chan]) {
277 break;
278 }
279 }
280 }
281 if (chan_s <= 3) {
282 /* use the already clamped color */
283 LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
284 LLVMValueRef border_chan;
285
286 border_chan = lp_build_extract_broadcast(bld->gallivm,
287 border_type,
288 bld->texel_type,
289 bld->border_color_clamped,
290 idx);
291 texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
292 border_chan, texel_out[chan]);
293 }
294 }
295 }
296 }
297
298 static LLVMValueRef
get_first_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)299 get_first_level(struct gallivm_state *gallivm,
300 LLVMTypeRef resources_type,
301 LLVMValueRef resources_ptr,
302 unsigned texture_unit,
303 LLVMValueRef texture_unit_offset,
304 const struct lp_static_texture_state *static_state,
305 struct lp_sampler_dynamic_state *dynamic_state)
306 {
307 if (static_state->level_zero_only)
308 return lp_build_const_int32(gallivm, 0);
309 else {
310 LLVMValueRef first_level;
311
312 first_level = dynamic_state->first_level(gallivm, resources_type,
313 resources_ptr, texture_unit,
314 texture_unit_offset);
315 first_level = LLVMBuildZExt(gallivm->builder, first_level,
316 LLVMInt32TypeInContext(gallivm->context), "");
317 return first_level;
318 }
319 }
320
321
322 static LLVMValueRef
get_last_level(struct gallivm_state * gallivm,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,unsigned texture_unit,LLVMValueRef texture_unit_offset,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state)323 get_last_level(struct gallivm_state *gallivm,
324 LLVMTypeRef resources_type,
325 LLVMValueRef resources_ptr,
326 unsigned texture_unit,
327 LLVMValueRef texture_unit_offset,
328 const struct lp_static_texture_state *static_state,
329 struct lp_sampler_dynamic_state *dynamic_state)
330 {
331 if (static_state->level_zero_only)
332 return lp_build_const_int32(gallivm, 0);
333 else {
334 LLVMValueRef last_level;
335
336 last_level = dynamic_state->last_level(gallivm, resources_type,
337 resources_ptr, texture_unit,
338 texture_unit_offset);
339 last_level = LLVMBuildZExt(gallivm->builder, last_level,
340 LLVMInt32TypeInContext(gallivm->context), "");
341 return last_level;
342 }
343 }
344
345 /**
346 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR_REPEAT mode.
347 * (Note that with pot sizes could do this much more easily post-scale
348 * with some bit arithmetic.)
349 */
350 static LLVMValueRef
lp_build_coord_mirror(struct lp_build_sample_context * bld,LLVMValueRef coord,bool posOnly)351 lp_build_coord_mirror(struct lp_build_sample_context *bld,
352 LLVMValueRef coord, bool posOnly)
353 {
354 struct lp_build_context *coord_bld = &bld->coord_bld;
355 LLVMValueRef fract;
356 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
357
358 /*
359 * We can just use 2*(x - round(0.5*x)) to do all the mirroring,
360 * it all works out. (The result is in range [-1, 1.0], negative if
361 * the coord is in the "odd" section, otherwise positive.)
362 */
363
364 coord = lp_build_mul(coord_bld, coord, half);
365 fract = lp_build_round(coord_bld, coord);
366 fract = lp_build_sub(coord_bld, coord, fract);
367 coord = lp_build_add(coord_bld, fract, fract);
368
369 if (posOnly) {
370 /*
371 * Theoretically it's not quite 100% accurate because the spec says
372 * that ultimately a scaled coord of -x.0 should map to int coord
373 * -x + 1 with mirroring, not -x (this does not matter for bilinear
374 * filtering).
375 */
376 coord = lp_build_abs(coord_bld, coord);
377 /* kill off NaNs */
378 /* XXX: not safe without arch rounding, fract can be anything. */
379 coord = lp_build_max_ext(coord_bld, coord, coord_bld->zero,
380 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
381 }
382
383 return coord;
384 }
385
386
387 /**
388 * Helper to compute the first coord and the weight for
389 * linear wrap repeat npot textures
390 */
391 void
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context * bld,LLVMValueRef coord_f,LLVMValueRef length_i,LLVMValueRef length_f,LLVMValueRef * coord0_i,LLVMValueRef * weight_f)392 lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
393 LLVMValueRef coord_f,
394 LLVMValueRef length_i,
395 LLVMValueRef length_f,
396 LLVMValueRef *coord0_i,
397 LLVMValueRef *weight_f)
398 {
399 struct lp_build_context *coord_bld = &bld->coord_bld;
400 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
401 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
402 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
403 int_coord_bld->one);
404 LLVMValueRef mask;
405 /* wrap with normalized floats is just fract */
406 coord_f = lp_build_fract(coord_bld, coord_f);
407 /* mul by size and subtract 0.5 */
408 coord_f = lp_build_mul(coord_bld, coord_f, length_f);
409 coord_f = lp_build_sub(coord_bld, coord_f, half);
410 /*
411 * we avoided the 0.5/length division before the repeat wrap,
412 * now need to fix up edge cases with selects
413 */
414 /*
415 * Note we do a float (unordered) compare so we can eliminate NaNs.
416 * (Otherwise would need fract_safe above).
417 */
418 mask = lp_build_compare(coord_bld->gallivm, coord_bld->type,
419 PIPE_FUNC_LESS, coord_f, coord_bld->zero);
420
421 /* convert to int, compute lerp weight */
422 lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
423 *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
424 }
425
426
427 /**
428 * Build LLVM code for texture wrap mode for linear filtering.
429 * \param x0_out returns first integer texcoord
430 * \param x1_out returns second integer texcoord
431 * \param weight_out returns linear interpolation weight
432 */
433 static void
lp_build_sample_wrap_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode,LLVMValueRef * x0_out,LLVMValueRef * x1_out,LLVMValueRef * weight_out)434 lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
435 bool is_gather,
436 LLVMValueRef coord,
437 LLVMValueRef length,
438 LLVMValueRef length_f,
439 LLVMValueRef offset,
440 bool is_pot,
441 unsigned wrap_mode,
442 LLVMValueRef *x0_out,
443 LLVMValueRef *x1_out,
444 LLVMValueRef *weight_out)
445 {
446 struct lp_build_context *coord_bld = &bld->coord_bld;
447 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
448 LLVMBuilderRef builder = bld->gallivm->builder;
449 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
450 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
451 LLVMValueRef coord0, coord1, weight;
452
453 switch (wrap_mode) {
454 case PIPE_TEX_WRAP_REPEAT:
455 if (is_pot) {
456 /* mul by size and subtract 0.5 */
457 coord = lp_build_mul(coord_bld, coord, length_f);
458 coord = lp_build_sub(coord_bld, coord, half);
459 if (offset) {
460 offset = lp_build_int_to_float(coord_bld, offset);
461 coord = lp_build_add(coord_bld, coord, offset);
462 }
463 /* convert to int, compute lerp weight */
464 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
465 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
466 /* repeat wrap */
467 coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
468 coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
469 } else {
470 LLVMValueRef mask;
471 if (offset) {
472 offset = lp_build_int_to_float(coord_bld, offset);
473 offset = lp_build_div(coord_bld, offset, length_f);
474 coord = lp_build_add(coord_bld, coord, offset);
475 }
476 lp_build_coord_repeat_npot_linear(bld, coord,
477 length, length_f,
478 &coord0, &weight);
479 mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
480 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
481 coord1 = LLVMBuildAnd(builder,
482 lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
483 mask, "");
484 }
485 break;
486
487 case PIPE_TEX_WRAP_CLAMP:
488 if (bld->static_sampler_state->normalized_coords) {
489 /* scale coord to length */
490 coord = lp_build_mul(coord_bld, coord, length_f);
491 }
492 if (offset) {
493 offset = lp_build_int_to_float(coord_bld, offset);
494 coord = lp_build_add(coord_bld, coord, offset);
495 }
496
497 /*
498 * clamp to [0, length]
499 *
500 * Unlike some other wrap modes, this should be correct for gather
501 * too. GL_CLAMP explicitly does this clamp on the coord prior to
502 * actual wrapping (which is per sample).
503 */
504 coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
505
506 coord = lp_build_sub(coord_bld, coord, half);
507
508 /* convert to int, compute lerp weight */
509 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
510 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
511 break;
512
513 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
514 {
515 struct lp_build_context abs_coord_bld = bld->coord_bld;
516 abs_coord_bld.type.sign = false;
517
518 if (bld->static_sampler_state->normalized_coords) {
519 /* mul by tex size */
520 coord = lp_build_mul(coord_bld, coord, length_f);
521 }
522 if (offset) {
523 offset = lp_build_int_to_float(coord_bld, offset);
524 coord = lp_build_add(coord_bld, coord, offset);
525 }
526
527 /* clamp to length max */
528 coord = lp_build_min_ext(coord_bld, coord, length_f,
529 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
530 if (!is_gather) {
531 /* subtract 0.5 */
532 coord = lp_build_sub(coord_bld, coord, half);
533 /* clamp to [0, length - 0.5] */
534 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
535 /* convert to int, compute lerp weight */
536 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
537 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
538 } else {
539 /*
540 * The non-gather path will end up with coords 0, 1 if coord was
541 * smaller than 0.5 (with corresponding weight 0.0 so it doesn't
542 * really matter what the second coord is). But for gather, we
543 * really need to end up with coords 0, 0.
544 */
545 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
546 coord0 = lp_build_sub(coord_bld, coord, half);
547 coord1 = lp_build_add(coord_bld, coord, half);
548 /* Values range ([-0.5, length_f - 0.5], [0.5, length_f + 0.5] */
549 coord0 = lp_build_itrunc(coord_bld, coord0);
550 coord1 = lp_build_itrunc(coord_bld, coord1);
551 weight = coord_bld->undef;
552 }
553 /* coord1 = min(coord1, length-1) */
554 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
555 break;
556 }
557
558 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
559 if (bld->static_sampler_state->normalized_coords) {
560 /* scale coord to length */
561 coord = lp_build_mul(coord_bld, coord, length_f);
562 }
563 if (offset) {
564 offset = lp_build_int_to_float(coord_bld, offset);
565 coord = lp_build_add(coord_bld, coord, offset);
566 }
567 /*
568 * We don't need any clamp. Technically, for very large (pos or neg)
569 * (or infinite) values, clamp against [-length, length] would be
570 * correct, but we don't need to guarantee any specific
571 * result for such coords (the ifloor will be undefined, but for modes
572 * requiring border all resulting coords are safe).
573 */
574 coord = lp_build_sub(coord_bld, coord, half);
575 /* convert to int, compute lerp weight */
576 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
577 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
578 break;
579
580 case PIPE_TEX_WRAP_MIRROR_REPEAT:
581 if (offset) {
582 offset = lp_build_int_to_float(coord_bld, offset);
583 offset = lp_build_div(coord_bld, offset, length_f);
584 coord = lp_build_add(coord_bld, coord, offset);
585 }
586 if (!is_gather) {
587 /* compute mirror function */
588 coord = lp_build_coord_mirror(bld, coord, true);
589
590 /* scale coord to length */
591 coord = lp_build_mul(coord_bld, coord, length_f);
592 coord = lp_build_sub(coord_bld, coord, half);
593
594 /* convert to int, compute lerp weight */
595 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
596 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
597
598 /* coord0 = max(coord0, 0) */
599 coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
600 /* coord1 = min(coord1, length-1) */
601 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
602 } else {
603 /*
604 * This is pretty reasonable in the end, all what the tests care
605 * about is nasty edge cases (scaled coords x.5, so the individual
606 * coords are actually integers, which is REALLY tricky to get right
607 * due to this working differently both for negative numbers as well
608 * as for even/odd cases). But with enough magic it's not too complex
609 * after all.
610 * Maybe should try a bit arithmetic one though for POT textures...
611 */
612 LLVMValueRef isNeg;
613 /*
614 * Wrapping just once still works, even though it means we can
615 * get "wrong" sign due to performing mirror in the middle of the
616 * two coords (because this can only happen very near the odd/even
617 * edges, so both coords will actually end up as 0 or length - 1
618 * in the end).
619 * For GL4 gather with per-sample offsets we'd need to the mirroring
620 * per coord too.
621 */
622 coord = lp_build_coord_mirror(bld, coord, false);
623 coord = lp_build_mul(coord_bld, coord, length_f);
624
625 /*
626 * NaNs should be safe here, we'll do away with them with
627 * the ones' complement plus min.
628 */
629 coord0 = lp_build_sub(coord_bld, coord, half);
630 coord0 = lp_build_ifloor(coord_bld, coord0);
631 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
632 /* ones complement for neg numbers (mirror(negX) = X - 1) */
633 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
634 coord0, int_coord_bld->zero);
635 coord0 = lp_build_xor(int_coord_bld, coord0, isNeg);
636 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS,
637 coord1, int_coord_bld->zero);
638 coord1 = lp_build_xor(int_coord_bld, coord1, isNeg);
639 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
640 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
641
642 weight = coord_bld->undef;
643 }
644 break;
645
646 case PIPE_TEX_WRAP_MIRROR_CLAMP:
647 if (bld->static_sampler_state->normalized_coords) {
648 /* scale coord to length */
649 coord = lp_build_mul(coord_bld, coord, length_f);
650 }
651 if (offset) {
652 offset = lp_build_int_to_float(coord_bld, offset);
653 coord = lp_build_add(coord_bld, coord, offset);
654 }
655 /*
656 * XXX: probably not correct for gather, albeit I'm not
657 * entirely sure as it's poorly specified. The wrapping looks
658 * correct according to the spec which is against gl 1.2.1,
659 * however negative values will be swapped - gl re-specified
660 * wrapping with newer versions (no more pre-clamp except with
661 * GL_CLAMP).
662 */
663 coord = lp_build_abs(coord_bld, coord);
664
665 /* clamp to [0, length] */
666 coord = lp_build_min_ext(coord_bld, coord, length_f,
667 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
668
669 coord = lp_build_sub(coord_bld, coord, half);
670
671 /* convert to int, compute lerp weight */
672 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
673 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
674 break;
675
676 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
677 {
678 struct lp_build_context abs_coord_bld = bld->coord_bld;
679 abs_coord_bld.type.sign = false;
680
681 if (bld->static_sampler_state->normalized_coords) {
682 /* scale coord to length */
683 coord = lp_build_mul(coord_bld, coord, length_f);
684 }
685 if (offset) {
686 offset = lp_build_int_to_float(coord_bld, offset);
687 coord = lp_build_add(coord_bld, coord, offset);
688 }
689 if (!is_gather) {
690 coord = lp_build_abs(coord_bld, coord);
691
692 /* clamp to length max */
693 coord = lp_build_min_ext(coord_bld, coord, length_f,
694 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
695 /* subtract 0.5 */
696 coord = lp_build_sub(coord_bld, coord, half);
697 /* clamp to [0, length - 0.5] */
698 coord = lp_build_max(coord_bld, coord, coord_bld->zero);
699
700 /* convert to int, compute lerp weight */
701 lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
702 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
703 /* coord1 = min(coord1, length-1) */
704 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
705 } else {
706 /*
707 * The non-gather path will swap coord0/1 if coord was negative,
708 * which is ok for filtering since the filter weight matches
709 * accordingly. Also, if coord is close to zero, coord0/1 will
710 * be 0 and 1, instead of 0 and 0 (again ok due to filter
711 * weight being 0.0). Both issues need to be fixed for gather.
712 */
713 LLVMValueRef isNeg;
714
715 /*
716 * Actually wanted to cheat here and use:
717 * coord1 = lp_build_iround(coord_bld, coord);
718 * but it's not good enough for some tests (even piglit
719 * textureGather is set up in a way so the coords area always
720 * .5, that is right at the crossover points).
721 * So do ordinary sub/floor, then do ones' complement
722 * for negative numbers.
723 * (Note can't just do sub|add/abs/itrunc per coord neither -
724 * because the spec demands that mirror(3.0) = 3 but
725 * mirror(-3.0) = 2.)
726 */
727 coord = lp_build_sub(coord_bld, coord, half);
728 coord0 = lp_build_ifloor(coord_bld, coord);
729 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
730 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord0,
731 int_coord_bld->zero);
732 coord0 = lp_build_xor(int_coord_bld, isNeg, coord0);
733 coord0 = lp_build_min(int_coord_bld, coord0, length_minus_one);
734
735 isNeg = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, coord1,
736 int_coord_bld->zero);
737 coord1 = lp_build_xor(int_coord_bld, isNeg, coord1);
738 coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
739
740 weight = coord_bld->undef;
741 }
742 }
743 break;
744
745 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
746 {
747 if (bld->static_sampler_state->normalized_coords) {
748 /* scale coord to length */
749 coord = lp_build_mul(coord_bld, coord, length_f);
750 }
751 if (offset) {
752 offset = lp_build_int_to_float(coord_bld, offset);
753 coord = lp_build_add(coord_bld, coord, offset);
754 }
755 /*
756 * XXX: probably not correct for gather due to swapped
757 * order if coord is negative (same rationale as for
758 * MIRROR_CLAMP).
759 */
760 coord = lp_build_abs(coord_bld, coord);
761
762 /*
763 * We don't need any clamp. Technically, for very large
764 * (or infinite) values, clamp against length would be
765 * correct, but we don't need to guarantee any specific
766 * result for such coords (the ifloor will be undefined, but
767 * for modes requiring border all resulting coords are safe).
768 */
769 coord = lp_build_sub(coord_bld, coord, half);
770
771 /* convert to int, compute lerp weight */
772 lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
773 coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
774 }
775 break;
776
777 default:
778 assert(0);
779 coord0 = NULL;
780 coord1 = NULL;
781 weight = NULL;
782 }
783
784 *x0_out = coord0;
785 *x1_out = coord1;
786 *weight_out = weight;
787 }
788
789
790 /**
791 * Build LLVM code for texture wrap mode for nearest filtering.
792 * \param coord the incoming texcoord (nominally in [0,1])
793 * \param length the texture size along one dimension, as int vector
794 * \param length_f the texture size along one dimension, as float vector
795 * \param offset texel offset along one dimension (as int vector)
796 * \param is_pot if TRUE, length is a power of two
797 * \param wrap_mode one of PIPE_TEX_WRAP_x
798 */
799 static LLVMValueRef
lp_build_sample_wrap_nearest(struct lp_build_sample_context * bld,LLVMValueRef coord,LLVMValueRef length,LLVMValueRef length_f,LLVMValueRef offset,bool is_pot,unsigned wrap_mode)800 lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
801 LLVMValueRef coord,
802 LLVMValueRef length,
803 LLVMValueRef length_f,
804 LLVMValueRef offset,
805 bool is_pot,
806 unsigned wrap_mode)
807 {
808 struct lp_build_context *coord_bld = &bld->coord_bld;
809 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
810 LLVMBuilderRef builder = bld->gallivm->builder;
811 LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
812 LLVMValueRef icoord;
813
814 switch (wrap_mode) {
815 case PIPE_TEX_WRAP_REPEAT:
816 if (is_pot) {
817 coord = lp_build_mul(coord_bld, coord, length_f);
818 icoord = lp_build_ifloor(coord_bld, coord);
819 if (offset) {
820 icoord = lp_build_add(int_coord_bld, icoord, offset);
821 }
822 icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
823 } else {
824 if (offset) {
825 offset = lp_build_int_to_float(coord_bld, offset);
826 offset = lp_build_div(coord_bld, offset, length_f);
827 coord = lp_build_add(coord_bld, coord, offset);
828 }
829 /* take fraction, unnormalize */
830 coord = lp_build_fract_safe(coord_bld, coord);
831 coord = lp_build_mul(coord_bld, coord, length_f);
832 icoord = lp_build_itrunc(coord_bld, coord);
833 }
834 break;
835
836 case PIPE_TEX_WRAP_CLAMP:
837 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
838 if (bld->static_sampler_state->normalized_coords) {
839 /* scale coord to length */
840 coord = lp_build_mul(coord_bld, coord, length_f);
841 }
842
843 if (offset) {
844 offset = lp_build_int_to_float(coord_bld, offset);
845 coord = lp_build_add(coord_bld, coord, offset);
846 }
847 /* floor */
848 /* use itrunc instead since we clamp to 0 anyway */
849 icoord = lp_build_itrunc(coord_bld, coord);
850
851 /* clamp to [0, length - 1]. */
852 icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
853 length_minus_one);
854 break;
855
856 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
857 if (bld->static_sampler_state->normalized_coords) {
858 /* scale coord to length */
859 coord = lp_build_mul(coord_bld, coord, length_f);
860 }
861 /* no clamp necessary, border masking will handle this */
862 icoord = lp_build_ifloor(coord_bld, coord);
863 if (offset) {
864 icoord = lp_build_add(int_coord_bld, icoord, offset);
865 }
866 break;
867
868 case PIPE_TEX_WRAP_MIRROR_REPEAT:
869 if (offset) {
870 offset = lp_build_int_to_float(coord_bld, offset);
871 offset = lp_build_div(coord_bld, offset, length_f);
872 coord = lp_build_add(coord_bld, coord, offset);
873 }
874 /* compute mirror function */
875 coord = lp_build_coord_mirror(bld, coord, true);
876
877 /* scale coord to length */
878 assert(bld->static_sampler_state->normalized_coords);
879 coord = lp_build_mul(coord_bld, coord, length_f);
880
881 /* itrunc == ifloor here */
882 icoord = lp_build_itrunc(coord_bld, coord);
883
884 /* clamp to [0, length - 1] */
885 icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
886 break;
887
888 case PIPE_TEX_WRAP_MIRROR_CLAMP:
889 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
890 if (bld->static_sampler_state->normalized_coords) {
891 /* scale coord to length */
892 coord = lp_build_mul(coord_bld, coord, length_f);
893 }
894 if (offset) {
895 offset = lp_build_int_to_float(coord_bld, offset);
896 coord = lp_build_add(coord_bld, coord, offset);
897 }
898 coord = lp_build_abs(coord_bld, coord);
899
900 /* itrunc == ifloor here */
901 icoord = lp_build_itrunc(coord_bld, coord);
902 /*
903 * Use unsigned min due to possible undef values (NaNs, overflow)
904 */
905 {
906 struct lp_build_context abs_coord_bld = *int_coord_bld;
907 abs_coord_bld.type.sign = false;
908 /* clamp to [0, length - 1] */
909 icoord = lp_build_min(&abs_coord_bld, icoord, length_minus_one);
910 }
911 break;
912
913 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
914 if (bld->static_sampler_state->normalized_coords) {
915 /* scale coord to length */
916 coord = lp_build_mul(coord_bld, coord, length_f);
917 }
918 if (offset) {
919 offset = lp_build_int_to_float(coord_bld, offset);
920 coord = lp_build_add(coord_bld, coord, offset);
921 }
922 coord = lp_build_abs(coord_bld, coord);
923
924 /* itrunc == ifloor here */
925 icoord = lp_build_itrunc(coord_bld, coord);
926 break;
927
928 default:
929 assert(0);
930 icoord = NULL;
931 }
932
933 return icoord;
934 }
935
936
937 /**
938 * Do shadow test/comparison.
939 * \param p shadow ref value
940 * \param texel the texel to compare against
941 */
942 static LLVMValueRef
lp_build_sample_comparefunc(struct lp_build_sample_context * bld,LLVMValueRef p,LLVMValueRef texel)943 lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
944 LLVMValueRef p,
945 LLVMValueRef texel)
946 {
947 struct lp_build_context *texel_bld = &bld->texel_bld;
948 LLVMValueRef res;
949
950 if (0) {
951 //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
952 lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
953 }
954
955 /* result = (p FUNC texel) ? 1 : 0 */
956 /*
957 * honor d3d10 floating point rules here, which state that comparisons
958 * are ordered except NOT_EQUAL which is unordered.
959 */
960 if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
961 res = lp_build_cmp_ordered(texel_bld,
962 bld->static_sampler_state->compare_func,
963 p, texel);
964 } else {
965 res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
966 p, texel);
967 }
968 return res;
969 }
970
971
972 /**
973 * Generate code to sample a mipmap level with nearest filtering.
974 * If sampling a cube texture, r = cube face in [0,5].
975 */
976 static void
lp_build_sample_image_nearest(struct lp_build_sample_context * bld,LLVMValueRef size,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])977 lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
978 LLVMValueRef size,
979 LLVMValueRef row_stride_vec,
980 LLVMValueRef img_stride_vec,
981 LLVMValueRef data_ptr,
982 LLVMValueRef mipoffsets,
983 LLVMValueRef ilevel,
984 const LLVMValueRef *coords,
985 const LLVMValueRef *offsets,
986 LLVMValueRef colors_out[4])
987 {
988 const unsigned dims = bld->dims;
989 LLVMValueRef width_vec;
990 LLVMValueRef height_vec;
991 LLVMValueRef depth_vec;
992 LLVMValueRef flt_size;
993 LLVMValueRef flt_width_vec;
994 LLVMValueRef flt_height_vec;
995 LLVMValueRef flt_depth_vec;
996 LLVMValueRef x, y = NULL, z = NULL;
997
998 lp_build_extract_image_sizes(bld,
999 &bld->int_size_bld,
1000 bld->int_coord_type,
1001 size,
1002 &width_vec, &height_vec, &depth_vec);
1003
1004 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1005
1006 lp_build_extract_image_sizes(bld,
1007 &bld->float_size_bld,
1008 bld->coord_type,
1009 flt_size,
1010 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1011
1012 /*
1013 * Compute integer texcoords.
1014 */
1015 x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
1016 flt_width_vec, offsets[0],
1017 bld->static_texture_state->pot_width,
1018 bld->static_sampler_state->wrap_s);
1019 lp_build_name(x, "tex.x.wrapped");
1020
1021 if (dims >= 2) {
1022 y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
1023 flt_height_vec, offsets[1],
1024 bld->static_texture_state->pot_height,
1025 bld->static_sampler_state->wrap_t);
1026 lp_build_name(y, "tex.y.wrapped");
1027
1028 if (dims == 3) {
1029 z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
1030 flt_depth_vec, offsets[2],
1031 bld->static_texture_state->pot_depth,
1032 bld->static_sampler_state->wrap_r);
1033 lp_build_name(z, "tex.z.wrapped");
1034 }
1035 }
1036 if (has_layer_coord(bld->static_texture_state->target)) {
1037 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1038 /* add cube layer to face */
1039 z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1040 } else {
1041 z = coords[2];
1042 }
1043 lp_build_name(z, "tex.z.layer");
1044 }
1045
1046 /*
1047 * Get texture colors.
1048 */
1049 lp_build_sample_texel_soa(bld,
1050 width_vec, height_vec, depth_vec,
1051 x, y, z,
1052 row_stride_vec, img_stride_vec,
1053 data_ptr, mipoffsets, ilevel, colors_out);
1054
1055 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1056 LLVMValueRef cmpval;
1057 cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
1058 /* this is really just a AND 1.0, cmpval but llvm is clever enough */
1059 colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
1060 bld->texel_bld.one, bld->texel_bld.zero);
1061 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1062 }
1063
1064 }
1065
1066
1067 /**
1068 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
1069 */
1070 static LLVMValueRef
lp_build_masklerp(struct lp_build_context * bld,LLVMValueRef weight,LLVMValueRef mask0,LLVMValueRef mask1)1071 lp_build_masklerp(struct lp_build_context *bld,
1072 LLVMValueRef weight,
1073 LLVMValueRef mask0,
1074 LLVMValueRef mask1)
1075 {
1076 struct gallivm_state *gallivm = bld->gallivm;
1077 LLVMBuilderRef builder = gallivm->builder;
1078 LLVMValueRef weight2;
1079
1080 weight2 = lp_build_sub(bld, bld->one, weight);
1081 weight = LLVMBuildBitCast(builder, weight,
1082 lp_build_int_vec_type(gallivm, bld->type), "");
1083 weight2 = LLVMBuildBitCast(builder, weight2,
1084 lp_build_int_vec_type(gallivm, bld->type), "");
1085 weight = LLVMBuildAnd(builder, weight, mask1, "");
1086 weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
1087 weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
1088 weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
1089 return lp_build_add(bld, weight, weight2);
1090 }
1091
1092 /**
1093 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
1094 */
1095 static LLVMValueRef
lp_build_masklerp2d(struct lp_build_context * bld,LLVMValueRef weight0,LLVMValueRef weight1,LLVMValueRef mask00,LLVMValueRef mask01,LLVMValueRef mask10,LLVMValueRef mask11)1096 lp_build_masklerp2d(struct lp_build_context *bld,
1097 LLVMValueRef weight0,
1098 LLVMValueRef weight1,
1099 LLVMValueRef mask00,
1100 LLVMValueRef mask01,
1101 LLVMValueRef mask10,
1102 LLVMValueRef mask11)
1103 {
1104 LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
1105 LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
1106 return lp_build_lerp(bld, weight1, val0, val1, 0);
1107 }
1108
1109 /*
1110 * this is a bit excessive code for something OpenGL just recommends
1111 * but does not require.
1112 */
1113 #define ACCURATE_CUBE_CORNERS 1
1114
1115 /**
1116 * Generate code to sample a mipmap level with linear filtering.
1117 * If sampling a cube texture, r = cube face in [0,5].
1118 * If linear_mask is present, only pixels having their mask set
1119 * will receive linear filtering, the rest will use nearest.
1120 */
1121 static void
lp_build_sample_image_linear(struct lp_build_sample_context * bld,bool is_gather,LLVMValueRef size,LLVMValueRef linear_mask,LLVMValueRef row_stride_vec,LLVMValueRef img_stride_vec,LLVMValueRef data_ptr,LLVMValueRef mipoffsets,LLVMValueRef ilevel,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef colors_out[4])1122 lp_build_sample_image_linear(struct lp_build_sample_context *bld,
1123 bool is_gather,
1124 LLVMValueRef size,
1125 LLVMValueRef linear_mask,
1126 LLVMValueRef row_stride_vec,
1127 LLVMValueRef img_stride_vec,
1128 LLVMValueRef data_ptr,
1129 LLVMValueRef mipoffsets,
1130 LLVMValueRef ilevel,
1131 const LLVMValueRef *coords,
1132 const LLVMValueRef *offsets,
1133 LLVMValueRef colors_out[4])
1134 {
1135 LLVMBuilderRef builder = bld->gallivm->builder;
1136 struct lp_build_context *ivec_bld = &bld->int_coord_bld;
1137 struct lp_build_context *coord_bld = &bld->coord_bld;
1138 struct lp_build_context *texel_bld = &bld->texel_bld;
1139 const unsigned dims = bld->dims;
1140 LLVMValueRef width_vec;
1141 LLVMValueRef height_vec;
1142 LLVMValueRef depth_vec;
1143 LLVMValueRef flt_size;
1144 LLVMValueRef flt_width_vec;
1145 LLVMValueRef flt_height_vec;
1146 LLVMValueRef flt_depth_vec;
1147 LLVMValueRef fall_off[4] = { 0 }, have_corners = NULL;
1148 LLVMValueRef z1 = NULL;
1149 LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
1150 LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
1151 LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
1152 LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
1153 LLVMValueRef xs[4], ys[4], zs[4];
1154 LLVMValueRef neighbors[2][2][4];
1155 bool seamless_cube_filter, accurate_cube_corners;
1156 unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1157
1158 if (is_gather) {
1159 switch (bld->gather_comp) {
1160 case 0: chan_swiz = bld->static_texture_state->swizzle_r; break;
1161 case 1: chan_swiz = bld->static_texture_state->swizzle_g; break;
1162 case 2: chan_swiz = bld->static_texture_state->swizzle_b; break;
1163 case 3: chan_swiz = bld->static_texture_state->swizzle_a; break;
1164 default:
1165 break;
1166 }
1167 }
1168
1169 seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
1170 bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
1171 bld->static_sampler_state->seamless_cube_map;
1172
1173 /*
1174 * Disable accurate cube corners for integer textures, which should only
1175 * get here in the gather path.
1176 */
1177 accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
1178 !util_format_is_pure_integer(bld->static_texture_state->format);
1179
1180 lp_build_extract_image_sizes(bld,
1181 &bld->int_size_bld,
1182 bld->int_coord_type,
1183 size,
1184 &width_vec, &height_vec, &depth_vec);
1185
1186 flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
1187
1188 lp_build_extract_image_sizes(bld,
1189 &bld->float_size_bld,
1190 bld->coord_type,
1191 flt_size,
1192 &flt_width_vec, &flt_height_vec, &flt_depth_vec);
1193
1194 LLVMTypeRef int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1195
1196 /*
1197 * Compute integer texcoords.
1198 */
1199
1200 if (!seamless_cube_filter) {
1201 lp_build_sample_wrap_linear(bld, is_gather, coords[0], width_vec,
1202 flt_width_vec, offsets[0],
1203 bld->static_texture_state->pot_width,
1204 bld->static_sampler_state->wrap_s,
1205 &x00, &x01, &s_fpart);
1206 lp_build_name(x00, "tex.x0.wrapped");
1207 lp_build_name(x01, "tex.x1.wrapped");
1208 x10 = x00;
1209 x11 = x01;
1210
1211 if (dims >= 2) {
1212 lp_build_sample_wrap_linear(bld, is_gather, coords[1], height_vec,
1213 flt_height_vec, offsets[1],
1214 bld->static_texture_state->pot_height,
1215 bld->static_sampler_state->wrap_t,
1216 &y00, &y10, &t_fpart);
1217 lp_build_name(y00, "tex.y0.wrapped");
1218 lp_build_name(y10, "tex.y1.wrapped");
1219 y01 = y00;
1220 y11 = y10;
1221
1222 if (dims == 3) {
1223 lp_build_sample_wrap_linear(bld, is_gather, coords[2], depth_vec,
1224 flt_depth_vec, offsets[2],
1225 bld->static_texture_state->pot_depth,
1226 bld->static_sampler_state->wrap_r,
1227 &z00, &z1, &r_fpart);
1228 z01 = z10 = z11 = z00;
1229 lp_build_name(z00, "tex.z0.wrapped");
1230 lp_build_name(z1, "tex.z1.wrapped");
1231 }
1232 }
1233 if (has_layer_coord(bld->static_texture_state->target)) {
1234 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1235 /* add cube layer to face */
1236 z00 = z01 = z10 = z11 = z1 =
1237 lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
1238 } else {
1239 z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */
1240 }
1241 lp_build_name(z00, "tex.z0.layer");
1242 lp_build_name(z1, "tex.z1.layer");
1243 }
1244 } else {
1245 struct lp_build_if_state edge_if;
1246 LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
1247 LLVMValueRef coord0, coord1, have_edge, have_corner;
1248 LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
1249 LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
1250 LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
1251 LLVMValueRef face = coords[2];
1252 LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
1253 LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
1254 /* XXX drop height calcs. Could (should) do this without seamless filtering too */
1255 height_vec = width_vec;
1256 flt_height_vec = flt_width_vec;
1257
1258 /* XXX the overflow logic is actually sort of duplicated with trilinear,
1259 * since an overflow in one mip should also have a corresponding overflow
1260 * in another.
1261 */
1262 /* should always have normalized coords, and offsets are undefined */
1263 assert(bld->static_sampler_state->normalized_coords);
1264 /*
1265 * The coords should all be between [0,1] however we can have NaNs,
1266 * which will wreak havoc. In particular the y1_clamped value below
1267 * can be -INT_MAX (on x86) and be propagated right through (probably
1268 * other values might be bogus in the end too).
1269 * So kill off the NaNs here.
1270 */
1271 coord0 = lp_build_max_ext(coord_bld, coords[0], coord_bld->zero,
1272 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1273 coord0 = lp_build_mul(coord_bld, coord0, flt_width_vec);
1274 /* instead of clamp, build mask if overflowed */
1275 coord0 = lp_build_sub(coord_bld, coord0, half);
1276 /* convert to int, compute lerp weight */
1277 /* not ideal with AVX (and no AVX2) */
1278 lp_build_ifloor_fract(coord_bld, coord0, &x0, &s_fpart);
1279 x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
1280 coord1 = lp_build_max_ext(coord_bld, coords[1], coord_bld->zero,
1281 GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN);
1282 coord1 = lp_build_mul(coord_bld, coord1, flt_height_vec);
1283 coord1 = lp_build_sub(coord_bld, coord1, half);
1284 lp_build_ifloor_fract(coord_bld, coord1, &y0, &t_fpart);
1285 y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
1286
1287 fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
1288 fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
1289 fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
1290 fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
1291
1292 fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
1293 fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
1294 have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
1295 have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
1296
1297 /* needed for accurate corner filtering branch later, rely on 0 init */
1298 have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1299
1300 for (unsigned texel_index = 0; texel_index < 4; texel_index++) {
1301 xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1302 ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1303 zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1304 }
1305
1306 lp_build_if(&edge_if, bld->gallivm, have_edge);
1307
1308 have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1309 have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1310 LLVMBuildStore(builder, have_corner, have_corners);
1311
1312 /*
1313 * Need to feed clamped values here for cheap corner handling,
1314 * but only for y coord (as when falling off both edges we only
1315 * fall off the x one) - this should be sufficient.
1316 */
1317 y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1318 y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1319
1320 /*
1321 * Get all possible new coords.
1322 */
1323 lp_build_cube_new_coords(ivec_bld, face,
1324 x0, x1, y0_clamped, y1_clamped,
1325 length_minus_one,
1326 new_faces, new_xcoords, new_ycoords);
1327
1328 /* handle fall off x-, x+ direction */
1329 /* determine new coords, face (not both fall_off vars can be true at same time) */
1330 x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1331 y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1332 x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1333 y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1334 x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1335 y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1336 x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1337 y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1338
1339 z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1340 z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1341
1342 /* handle fall off y-, y+ direction */
1343 /*
1344 * Cheap corner logic: just hack up things so a texel doesn't fall
1345 * off both sides (which means filter weights will be wrong but we'll only
1346 * use valid texels in the filter).
1347 * This means however (y) coords must additionally be clamped (see above).
1348 * This corner handling should be fully OpenGL (but not d3d10) compliant.
1349 */
1350 fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1351 fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1352 fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1353 fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1354
1355 x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1356 y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1357 x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1358 y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1359 x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1360 y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1361 x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1362 y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1363
1364 z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1365 z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1366 z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1367 z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1368
1369 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1370 /* now can add cube layer to face (per sample) */
1371 z00 = lp_build_add(ivec_bld, z00, coords[3]);
1372 z01 = lp_build_add(ivec_bld, z01, coords[3]);
1373 z10 = lp_build_add(ivec_bld, z10, coords[3]);
1374 z11 = lp_build_add(ivec_bld, z11, coords[3]);
1375 }
1376
1377 LLVMBuildStore(builder, x00, xs[0]);
1378 LLVMBuildStore(builder, x01, xs[1]);
1379 LLVMBuildStore(builder, x10, xs[2]);
1380 LLVMBuildStore(builder, x11, xs[3]);
1381 LLVMBuildStore(builder, y00, ys[0]);
1382 LLVMBuildStore(builder, y01, ys[1]);
1383 LLVMBuildStore(builder, y10, ys[2]);
1384 LLVMBuildStore(builder, y11, ys[3]);
1385 LLVMBuildStore(builder, z00, zs[0]);
1386 LLVMBuildStore(builder, z01, zs[1]);
1387 LLVMBuildStore(builder, z10, zs[2]);
1388 LLVMBuildStore(builder, z11, zs[3]);
1389
1390 lp_build_else(&edge_if);
1391
1392 LLVMBuildStore(builder, x0, xs[0]);
1393 LLVMBuildStore(builder, x1, xs[1]);
1394 LLVMBuildStore(builder, x0, xs[2]);
1395 LLVMBuildStore(builder, x1, xs[3]);
1396 LLVMBuildStore(builder, y0, ys[0]);
1397 LLVMBuildStore(builder, y0, ys[1]);
1398 LLVMBuildStore(builder, y1, ys[2]);
1399 LLVMBuildStore(builder, y1, ys[3]);
1400 if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1401 LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1402 LLVMBuildStore(builder, cube_layer, zs[0]);
1403 LLVMBuildStore(builder, cube_layer, zs[1]);
1404 LLVMBuildStore(builder, cube_layer, zs[2]);
1405 LLVMBuildStore(builder, cube_layer, zs[3]);
1406 } else {
1407 LLVMBuildStore(builder, face, zs[0]);
1408 LLVMBuildStore(builder, face, zs[1]);
1409 LLVMBuildStore(builder, face, zs[2]);
1410 LLVMBuildStore(builder, face, zs[3]);
1411 }
1412
1413 lp_build_endif(&edge_if);
1414
1415 LLVMTypeRef type = ivec_bld->vec_type;
1416 x00 = LLVMBuildLoad2(builder, type, xs[0], "");
1417 x01 = LLVMBuildLoad2(builder, type, xs[1], "");
1418 x10 = LLVMBuildLoad2(builder, type, xs[2], "");
1419 x11 = LLVMBuildLoad2(builder, type, xs[3], "");
1420 y00 = LLVMBuildLoad2(builder, type, ys[0], "");
1421 y01 = LLVMBuildLoad2(builder, type, ys[1], "");
1422 y10 = LLVMBuildLoad2(builder, type, ys[2], "");
1423 y11 = LLVMBuildLoad2(builder, type, ys[3], "");
1424 z00 = LLVMBuildLoad2(builder, type, zs[0], "");
1425 z01 = LLVMBuildLoad2(builder, type, zs[1], "");
1426 z10 = LLVMBuildLoad2(builder, type, zs[2], "");
1427 z11 = LLVMBuildLoad2(builder, type, zs[3], "");
1428 }
1429
1430 if (linear_mask) {
1431 /*
1432 * Whack filter weights into place. Whatever texel had more weight is
1433 * the one which should have been selected by nearest filtering hence
1434 * just use 100% weight for it.
1435 */
1436 struct lp_build_context *c_bld = &bld->coord_bld;
1437 LLVMValueRef w1_mask, w1_weight;
1438 LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1439
1440 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1441 /* this select is really just a "and" */
1442 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1443 s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1444 if (dims >= 2) {
1445 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1446 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1447 t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1448 if (dims == 3) {
1449 w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1450 w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1451 r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1452 }
1453 }
1454 }
1455
1456 /*
1457 * Get texture colors.
1458 */
1459 /* get x0/x1 texels */
1460 lp_build_sample_texel_soa(bld,
1461 width_vec, height_vec, depth_vec,
1462 x00, y00, z00,
1463 row_stride_vec, img_stride_vec,
1464 data_ptr, mipoffsets, ilevel, neighbors[0][0]);
1465 lp_build_sample_texel_soa(bld,
1466 width_vec, height_vec, depth_vec,
1467 x01, y01, z01,
1468 row_stride_vec, img_stride_vec,
1469 data_ptr, mipoffsets, ilevel, neighbors[0][1]);
1470
1471 if (dims == 1) {
1472 assert(!is_gather);
1473 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1474 lp_build_reduce_filter(texel_bld,
1475 bld->static_sampler_state->reduction_mode,
1476 0,
1477 4,
1478 s_fpart,
1479 neighbors[0][0],
1480 neighbors[0][1],
1481 colors_out);
1482 } else {
1483 LLVMValueRef cmpval0, cmpval1;
1484 cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1485 cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1486 /* simplified lerp, AND mask with weight and add */
1487 colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1488 cmpval0, cmpval1);
1489 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1490 }
1491 } else {
1492 /* 2D/3D texture */
1493 struct lp_build_if_state corner_if;
1494 LLVMValueRef colors0[4], colorss[4] = { 0 };
1495
1496 /* get x0/x1 texels at y1 */
1497 lp_build_sample_texel_soa(bld,
1498 width_vec, height_vec, depth_vec,
1499 x10, y10, z10,
1500 row_stride_vec, img_stride_vec,
1501 data_ptr, mipoffsets, ilevel, neighbors[1][0]);
1502 lp_build_sample_texel_soa(bld,
1503 width_vec, height_vec, depth_vec,
1504 x11, y11, z11,
1505 row_stride_vec, img_stride_vec,
1506 data_ptr, mipoffsets, ilevel, neighbors[1][1]);
1507
1508 /*
1509 * To avoid having to duplicate linear_mask / fetch code use
1510 * another branch (with corner condition though edge would work
1511 * as well) here.
1512 */
1513 if (have_corners && accurate_cube_corners &&
1514 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1515 LLVMValueRef c00, c01, c10, c11, c00f, c01f, c10f, c11f;
1516 LLVMValueRef have_corner, one_third;
1517
1518 colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs0");
1519 colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs1");
1520 colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs2");
1521 colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs3");
1522
1523 have_corner = LLVMBuildLoad2(builder, int1t, have_corners, "");
1524
1525 lp_build_if(&corner_if, bld->gallivm, have_corner);
1526
1527 one_third = lp_build_const_vec(bld->gallivm, coord_bld->type,
1528 1.0f/3.0f);
1529
1530 /* find corner */
1531 c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1532 c00f = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1533 c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1534 c01f = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1535 c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1536 c10f = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1537 c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1538 c11f = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1539
1540 if (!is_gather) {
1541 /*
1542 * we can't use standard 2d lerp as we need per-element weight
1543 * in case of corners, so just calculate bilinear result as
1544 * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1545 * (This is actually less work than using 2d lerp, 7 vs. 9
1546 * instructions, however calculating the weights needs another 6,
1547 * so actually probably not slower than 2d lerp only for 4 channels
1548 * as weights only need to be calculated once - of course fixing
1549 * the weights has additional cost.)
1550 */
1551 LLVMValueRef w00, w01, w10, w11, wx0, wy0, c_weight, tmp;
1552 wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1553 wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1554 w00 = lp_build_mul(coord_bld, wx0, wy0);
1555 w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1556 w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1557 w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1558
1559 /* find corner weight */
1560 c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1561 c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1562 c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1563 c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1564
1565 /*
1566 * add 1/3 of the corner weight to the weight of the 3 other
1567 * samples and null out corner weight.
1568 */
1569 c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1570 w00 = lp_build_add(coord_bld, w00, c_weight);
1571 w00 = lp_build_andnot(coord_bld, w00, c00f);
1572 w01 = lp_build_add(coord_bld, w01, c_weight);
1573 w01 = lp_build_andnot(coord_bld, w01, c01f);
1574 w10 = lp_build_add(coord_bld, w10, c_weight);
1575 w10 = lp_build_andnot(coord_bld, w10, c10f);
1576 w11 = lp_build_add(coord_bld, w11, c_weight);
1577 w11 = lp_build_andnot(coord_bld, w11, c11f);
1578
1579 if (bld->static_sampler_state->compare_mode ==
1580 PIPE_TEX_COMPARE_NONE) {
1581 for (unsigned chan = 0; chan < 4; chan++) {
1582 colors0[chan] = lp_build_mul(coord_bld, w00,
1583 neighbors[0][0][chan]);
1584 tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1585 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1586 tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1587 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1588 tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1589 colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1590 }
1591 } else {
1592 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1593 cmpval00 = lp_build_sample_comparefunc(bld, coords[4],
1594 neighbors[0][0][0]);
1595 cmpval01 = lp_build_sample_comparefunc(bld, coords[4],
1596 neighbors[0][1][0]);
1597 cmpval10 = lp_build_sample_comparefunc(bld, coords[4],
1598 neighbors[1][0][0]);
1599 cmpval11 = lp_build_sample_comparefunc(bld, coords[4],
1600 neighbors[1][1][0]);
1601 /*
1602 * inputs to interpolation are just masks so just add
1603 * masked weights together
1604 */
1605 cmpval00 = LLVMBuildBitCast(builder, cmpval00,
1606 coord_bld->vec_type, "");
1607 cmpval01 = LLVMBuildBitCast(builder, cmpval01,
1608 coord_bld->vec_type, "");
1609 cmpval10 = LLVMBuildBitCast(builder, cmpval10,
1610 coord_bld->vec_type, "");
1611 cmpval11 = LLVMBuildBitCast(builder, cmpval11,
1612 coord_bld->vec_type, "");
1613 colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1614 tmp = lp_build_and(coord_bld, w01, cmpval01);
1615 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1616 tmp = lp_build_and(coord_bld, w10, cmpval10);
1617 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1618 tmp = lp_build_and(coord_bld, w11, cmpval11);
1619 colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1620 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1621 }
1622 } else {
1623 /*
1624 * We don't have any weights to adjust, so instead calculate
1625 * the fourth texel as simply the average of the other 3.
1626 * (This would work for non-gather too, however we'd have
1627 * a boatload more of the select stuff due to there being
1628 * 4 times as many colors as weights.)
1629 */
1630 LLVMValueRef col00, col01, col10, col11;
1631 LLVMValueRef colc, colc0, colc1;
1632 col10 = lp_build_swizzle_soa_channel(texel_bld,
1633 neighbors[1][0], chan_swiz);
1634 col11 = lp_build_swizzle_soa_channel(texel_bld,
1635 neighbors[1][1], chan_swiz);
1636 col01 = lp_build_swizzle_soa_channel(texel_bld,
1637 neighbors[0][1], chan_swiz);
1638 col00 = lp_build_swizzle_soa_channel(texel_bld,
1639 neighbors[0][0], chan_swiz);
1640
1641 /*
1642 * The spec says for comparison filtering, the comparison
1643 * must happen before synthesizing the new value.
1644 * This means all gathered values are always 0 or 1,
1645 * except for the non-existing texel, which can be 0,1/3,2/3,1...
1646 * Seems like we'd be allowed to just return 0 or 1 too, so we
1647 * could simplify and pass down the compare mask values to the
1648 * end (using int arithmetic/compare on the mask values to
1649 * construct the fourth texel) and only there convert to floats
1650 * but it's probably not worth it (it might be easier for the cpu
1651 * but not for the code)...
1652 */
1653 if (bld->static_sampler_state->compare_mode !=
1654 PIPE_TEX_COMPARE_NONE) {
1655 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1656 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], col00);
1657 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], col01);
1658 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], col10);
1659 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], col11);
1660 col00 = lp_build_select(texel_bld, cmpval00,
1661 texel_bld->one, texel_bld->zero);
1662 col01 = lp_build_select(texel_bld, cmpval01,
1663 texel_bld->one, texel_bld->zero);
1664 col10 = lp_build_select(texel_bld, cmpval10,
1665 texel_bld->one, texel_bld->zero);
1666 col11 = lp_build_select(texel_bld, cmpval11,
1667 texel_bld->one, texel_bld->zero);
1668 }
1669
1670 /*
1671 * Null out corner color.
1672 */
1673 col00 = lp_build_andnot(coord_bld, col00, c00f);
1674 col01 = lp_build_andnot(coord_bld, col01, c01f);
1675 col10 = lp_build_andnot(coord_bld, col10, c10f);
1676 col11 = lp_build_andnot(coord_bld, col11, c11f);
1677
1678 /*
1679 * New corner texel color is all colors added / 3.
1680 */
1681 colc0 = lp_build_add(coord_bld, col00, col01);
1682 colc1 = lp_build_add(coord_bld, col10, col11);
1683 colc = lp_build_add(coord_bld, colc0, colc1);
1684 colc = lp_build_mul(coord_bld, one_third, colc);
1685
1686 /*
1687 * Replace the corner texel color with the new value.
1688 */
1689 col00 = lp_build_select(coord_bld, c00, colc, col00);
1690 col01 = lp_build_select(coord_bld, c01, colc, col01);
1691 col10 = lp_build_select(coord_bld, c10, colc, col10);
1692 col11 = lp_build_select(coord_bld, c11, colc, col11);
1693
1694 colors0[0] = col10;
1695 colors0[1] = col11;
1696 colors0[2] = col01;
1697 colors0[3] = col00;
1698 }
1699
1700 LLVMBuildStore(builder, colors0[0], colorss[0]);
1701 LLVMBuildStore(builder, colors0[1], colorss[1]);
1702 LLVMBuildStore(builder, colors0[2], colorss[2]);
1703 LLVMBuildStore(builder, colors0[3], colorss[3]);
1704
1705 lp_build_else(&corner_if);
1706 }
1707
1708 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1709 if (is_gather) {
1710 /*
1711 * Just assign the red channel (no component selection yet).
1712 * This is a bit hackish, we usually do the swizzle at the
1713 * end of sampling (much less values to swizzle), but this
1714 * obviously cannot work when using gather.
1715 */
1716 colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1717 neighbors[1][0],
1718 chan_swiz);
1719 colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1720 neighbors[1][1],
1721 chan_swiz);
1722 colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1723 neighbors[0][1],
1724 chan_swiz);
1725 colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1726 neighbors[0][0],
1727 chan_swiz);
1728 } else {
1729 /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1730 lp_build_reduce_filter_2d(texel_bld,
1731 bld->static_sampler_state->reduction_mode,
1732 0,
1733 4,
1734 s_fpart,
1735 t_fpart,
1736 neighbors[0][0],
1737 neighbors[0][1],
1738 neighbors[1][0],
1739 neighbors[1][1],
1740 colors0);
1741 }
1742 } else {
1743 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1744 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1745 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1746 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1747 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1748
1749 if (is_gather) {
1750 /* more hacks for swizzling, should be X, ONE or ZERO... */
1751 colors0[0] = lp_build_select(texel_bld, cmpval10,
1752 texel_bld->one, texel_bld->zero);
1753 colors0[1] = lp_build_select(texel_bld, cmpval11,
1754 texel_bld->one, texel_bld->zero);
1755 colors0[2] = lp_build_select(texel_bld, cmpval01,
1756 texel_bld->one, texel_bld->zero);
1757 colors0[3] = lp_build_select(texel_bld, cmpval00,
1758 texel_bld->one, texel_bld->zero);
1759 } else {
1760 colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1761 cmpval00, cmpval01, cmpval10, cmpval11);
1762 colors0[1] = colors0[2] = colors0[3] = colors0[0];
1763 }
1764 }
1765
1766 if (have_corners && accurate_cube_corners &&
1767 bld->static_sampler_state->reduction_mode == PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE) {
1768 LLVMBuildStore(builder, colors0[0], colorss[0]);
1769 LLVMBuildStore(builder, colors0[1], colorss[1]);
1770 LLVMBuildStore(builder, colors0[2], colorss[2]);
1771 LLVMBuildStore(builder, colors0[3], colorss[3]);
1772
1773 lp_build_endif(&corner_if);
1774
1775 colors0[0] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[0], "");
1776 colors0[1] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[1], "");
1777 colors0[2] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[2], "");
1778 colors0[3] = LLVMBuildLoad2(builder, coord_bld->vec_type, colorss[3], "");
1779 }
1780
1781 if (dims == 3) {
1782 LLVMValueRef neighbors1[2][2][4];
1783 LLVMValueRef colors1[4];
1784
1785 assert(!is_gather);
1786
1787 /* get x0/x1/y0/y1 texels at z1 */
1788 lp_build_sample_texel_soa(bld,
1789 width_vec, height_vec, depth_vec,
1790 x00, y00, z1,
1791 row_stride_vec, img_stride_vec,
1792 data_ptr, mipoffsets, ilevel, neighbors1[0][0]);
1793 lp_build_sample_texel_soa(bld,
1794 width_vec, height_vec, depth_vec,
1795 x01, y01, z1,
1796 row_stride_vec, img_stride_vec,
1797 data_ptr, mipoffsets, ilevel, neighbors1[0][1]);
1798 lp_build_sample_texel_soa(bld,
1799 width_vec, height_vec, depth_vec,
1800 x10, y10, z1,
1801 row_stride_vec, img_stride_vec,
1802 data_ptr, mipoffsets, ilevel, neighbors1[1][0]);
1803 lp_build_sample_texel_soa(bld,
1804 width_vec, height_vec, depth_vec,
1805 x11, y11, z1,
1806 row_stride_vec, img_stride_vec,
1807 data_ptr, mipoffsets, ilevel, neighbors1[1][1]);
1808
1809 if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1810 /* Bilinear interpolate the four samples from the second Z slice */
1811 lp_build_reduce_filter_2d(texel_bld,
1812 bld->static_sampler_state->reduction_mode,
1813 0,
1814 4,
1815 s_fpart,
1816 t_fpart,
1817 neighbors1[0][0],
1818 neighbors1[0][1],
1819 neighbors1[1][0],
1820 neighbors1[1][1],
1821 colors1);
1822
1823 /* Linearly interpolate the two samples from the two 3D slices */
1824 lp_build_reduce_filter(texel_bld,
1825 bld->static_sampler_state->reduction_mode,
1826 0,
1827 4,
1828 r_fpart,
1829 colors0,
1830 colors1,
1831 colors_out);
1832 } else {
1833 LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1834 cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1835 cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1836 cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1837 cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1838 colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1839 cmpval00, cmpval01, cmpval10, cmpval11);
1840 /* Linearly interpolate the two samples from the two 3D slices */
1841 colors_out[0] = lp_build_lerp(texel_bld,
1842 r_fpart,
1843 colors0[0], colors1[0],
1844 0);
1845 colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1846 }
1847 } else {
1848 /* 2D tex */
1849 for (unsigned chan = 0; chan < 4; chan++) {
1850 colors_out[chan] = colors0[chan];
1851 }
1852 }
1853 }
1854 if (is_gather) {
1855 /*
1856 * For gather, we can't do our usual channel swizzling done later,
1857 * so do it here. It only really matters for 0/1 swizzles in case
1858 * of comparison filtering, since in this case the results would be
1859 * wrong, without comparison it should all work out alright but it
1860 * can't hurt to do that here, since it will instantly drop all
1861 * calculations above, though it's a rather stupid idea to do
1862 * gather on a channel which will always return 0 or 1 in any case...
1863 */
1864 if (chan_swiz == PIPE_SWIZZLE_1) {
1865 for (unsigned chan = 0; chan < 4; chan++) {
1866 colors_out[chan] = texel_bld->one;
1867 }
1868 } else if (chan_swiz == PIPE_SWIZZLE_0) {
1869 for (unsigned chan = 0; chan < 4; chan++) {
1870 colors_out[chan] = texel_bld->zero;
1871 }
1872 }
1873 }
1874 }
1875
1876
1877 /**
1878 * Sample the texture/mipmap using given image filter and mip filter.
1879 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1880 * from (vectors or scalars).
1881 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1882 */
1883 static void
lp_build_sample_mipmap(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)1884 lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1885 unsigned img_filter,
1886 unsigned mip_filter,
1887 bool is_gather,
1888 const LLVMValueRef *coords,
1889 const LLVMValueRef *offsets,
1890 LLVMValueRef ilevel0,
1891 LLVMValueRef ilevel1,
1892 LLVMValueRef lod_fpart,
1893 LLVMValueRef *colors_out)
1894 {
1895 LLVMBuilderRef builder = bld->gallivm->builder;
1896 LLVMValueRef size0 = NULL;
1897 LLVMValueRef size1 = NULL;
1898 LLVMValueRef row_stride0_vec = NULL;
1899 LLVMValueRef row_stride1_vec = NULL;
1900 LLVMValueRef img_stride0_vec = NULL;
1901 LLVMValueRef img_stride1_vec = NULL;
1902 LLVMValueRef data_ptr0 = NULL;
1903 LLVMValueRef data_ptr1 = NULL;
1904 LLVMValueRef mipoff0 = NULL;
1905 LLVMValueRef mipoff1 = NULL;
1906 LLVMValueRef colors0[4], colors1[4];
1907
1908 /* sample the first mipmap level */
1909 lp_build_mipmap_level_sizes(bld, ilevel0,
1910 &size0,
1911 &row_stride0_vec, &img_stride0_vec);
1912 if (bld->num_mips == 1) {
1913 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1914 } else {
1915 /* This path should work for num_lods 1 too but slightly less efficient */
1916 data_ptr0 = bld->base_ptr;
1917 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1918 }
1919
1920 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1921 lp_build_sample_image_nearest(bld, size0,
1922 row_stride0_vec, img_stride0_vec,
1923 data_ptr0, mipoff0, ilevel0, coords, offsets,
1924 colors0);
1925 } else {
1926 assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1927 lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1928 row_stride0_vec, img_stride0_vec,
1929 data_ptr0, mipoff0, ilevel0, coords, offsets,
1930 colors0);
1931 }
1932
1933 /* Store the first level's colors in the output variables */
1934 for (unsigned chan = 0; chan < 4; chan++) {
1935 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1936 }
1937
1938 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1939 struct lp_build_if_state if_ctx;
1940 LLVMValueRef need_lerp;
1941
1942 /* need_lerp = lod_fpart > 0 */
1943 if (bld->num_lods == 1) {
1944 need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1945 lod_fpart, bld->lodf_bld.zero,
1946 "need_lerp");
1947 } else {
1948 /*
1949 * We'll do mip filtering if any of the quads (or individual
1950 * pixel in case of per-pixel lod) need it.
1951 * It might be better to split the vectors here and only fetch/filter
1952 * quads which need it (if there's one lod per quad).
1953 */
1954 need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1955 PIPE_FUNC_GREATER,
1956 lod_fpart, bld->lodf_bld.zero);
1957 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1958 lp_build_name(need_lerp, "need_lerp");
1959 }
1960
1961 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1962 {
1963 /*
1964 * We unfortunately need to clamp lod_fpart here since we can get
1965 * negative values which would screw up filtering if not all
1966 * lod_fpart values have same sign.
1967 */
1968 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1969 bld->lodf_bld.zero);
1970 /* sample the second mipmap level */
1971 lp_build_mipmap_level_sizes(bld, ilevel1,
1972 &size1,
1973 &row_stride1_vec, &img_stride1_vec);
1974 if (bld->num_mips == 1) {
1975 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1976 } else {
1977 data_ptr1 = bld->base_ptr;
1978 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1979 }
1980 if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1981 lp_build_sample_image_nearest(bld, size1,
1982 row_stride1_vec, img_stride1_vec,
1983 data_ptr1, mipoff1, ilevel1, coords, offsets,
1984 colors1);
1985 } else {
1986 lp_build_sample_image_linear(bld, false, size1, NULL,
1987 row_stride1_vec, img_stride1_vec,
1988 data_ptr1, mipoff1, ilevel1, coords, offsets,
1989 colors1);
1990 }
1991
1992 /* interpolate samples from the two mipmap levels */
1993
1994 if (bld->num_lods != bld->coord_type.length)
1995 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1996 bld->lodf_bld.type,
1997 bld->texel_bld.type,
1998 lod_fpart);
1999
2000 for (unsigned chan = 0; chan < 4; chan++) {
2001 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2002 colors0[chan], colors1[chan],
2003 0);
2004 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2005 }
2006 }
2007 lp_build_endif(&if_ctx);
2008 }
2009 }
2010
2011
2012 /**
2013 * Sample the texture/mipmap using given mip filter, and using
2014 * both nearest and linear filtering at the same time depending
2015 * on linear_mask.
2016 * lod can be per quad but linear_mask is always per pixel.
2017 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
2018 * from (vectors or scalars).
2019 * If we're using nearest miplevel sampling the '1' values will be null/unused.
2020 */
2021 static void
lp_build_sample_mipmap_both(struct lp_build_sample_context * bld,LLVMValueRef linear_mask,unsigned mip_filter,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef lod_positive,LLVMValueRef * colors_out)2022 lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
2023 LLVMValueRef linear_mask,
2024 unsigned mip_filter,
2025 const LLVMValueRef *coords,
2026 const LLVMValueRef *offsets,
2027 LLVMValueRef ilevel0,
2028 LLVMValueRef ilevel1,
2029 LLVMValueRef lod_fpart,
2030 LLVMValueRef lod_positive,
2031 LLVMValueRef *colors_out)
2032 {
2033 LLVMBuilderRef builder = bld->gallivm->builder;
2034 LLVMValueRef size0 = NULL;
2035 LLVMValueRef size1 = NULL;
2036 LLVMValueRef row_stride0_vec = NULL;
2037 LLVMValueRef row_stride1_vec = NULL;
2038 LLVMValueRef img_stride0_vec = NULL;
2039 LLVMValueRef img_stride1_vec = NULL;
2040 LLVMValueRef data_ptr0 = NULL;
2041 LLVMValueRef data_ptr1 = NULL;
2042 LLVMValueRef mipoff0 = NULL;
2043 LLVMValueRef mipoff1 = NULL;
2044 LLVMValueRef colors0[4], colors1[4];
2045
2046 /* sample the first mipmap level */
2047 lp_build_mipmap_level_sizes(bld, ilevel0,
2048 &size0,
2049 &row_stride0_vec, &img_stride0_vec);
2050 if (bld->num_mips == 1) {
2051 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2052 } else {
2053 /* This path should work for num_lods 1 too but slightly less efficient */
2054 data_ptr0 = bld->base_ptr;
2055 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2056 }
2057
2058 lp_build_sample_image_linear(bld, false, size0, linear_mask,
2059 row_stride0_vec, img_stride0_vec,
2060 data_ptr0, mipoff0, ilevel0, coords, offsets,
2061 colors0);
2062
2063 /* Store the first level's colors in the output variables */
2064 for (unsigned chan = 0; chan < 4; chan++) {
2065 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2066 }
2067
2068 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2069 struct lp_build_if_state if_ctx;
2070 LLVMValueRef need_lerp;
2071
2072 /*
2073 * We'll do mip filtering if any of the quads (or individual
2074 * pixel in case of per-pixel lod) need it.
2075 * Note using lod_positive here not lod_fpart since it may be the same
2076 * condition as that used in the outer "if" in the caller hence llvm
2077 * should be able to merge the branches in this case.
2078 */
2079 need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
2080 lp_build_name(need_lerp, "need_lerp");
2081
2082 lp_build_if(&if_ctx, bld->gallivm, need_lerp);
2083 {
2084 /*
2085 * We unfortunately need to clamp lod_fpart here since we can get
2086 * negative values which would screw up filtering if not all
2087 * lod_fpart values have same sign.
2088 */
2089 lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
2090 bld->lodf_bld.zero);
2091 /* sample the second mipmap level */
2092 lp_build_mipmap_level_sizes(bld, ilevel1,
2093 &size1,
2094 &row_stride1_vec, &img_stride1_vec);
2095 if (bld->num_mips == 1) {
2096 data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
2097 } else {
2098 data_ptr1 = bld->base_ptr;
2099 mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
2100 }
2101
2102 lp_build_sample_image_linear(bld, false, size1, linear_mask,
2103 row_stride1_vec, img_stride1_vec,
2104 data_ptr1, mipoff1, ilevel1, coords, offsets,
2105 colors1);
2106
2107 /* interpolate samples from the two mipmap levels */
2108
2109 if (bld->num_lods != bld->coord_type.length)
2110 lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2111 bld->lodf_bld.type,
2112 bld->texel_bld.type,
2113 lod_fpart);
2114
2115 for (unsigned chan = 0; chan < 4; chan++) {
2116 colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
2117 colors0[chan], colors1[chan],
2118 0);
2119 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2120 }
2121 }
2122 lp_build_endif(&if_ctx);
2123 }
2124 }
2125
2126
2127 /**
2128 * Build (per-coord) layer value.
2129 * Either clamp layer to valid values or fill in optional out_of_bounds
2130 * value and just return value unclamped.
2131 */
2132 static LLVMValueRef
lp_build_layer_coord(struct lp_build_sample_context * bld,unsigned texture_unit,bool is_cube_array,LLVMValueRef layer,LLVMValueRef * out_of_bounds)2133 lp_build_layer_coord(struct lp_build_sample_context *bld,
2134 unsigned texture_unit,
2135 bool is_cube_array,
2136 LLVMValueRef layer,
2137 LLVMValueRef *out_of_bounds)
2138 {
2139 LLVMValueRef num_layers;
2140 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2141
2142 num_layers = bld->dynamic_state->depth(bld->gallivm, bld->resources_type,
2143 bld->resources_ptr, texture_unit, NULL);
2144 num_layers = LLVMBuildZExt(bld->gallivm->builder, num_layers,
2145 bld->int_bld.elem_type, "");
2146 if (out_of_bounds) {
2147 LLVMValueRef out1, out;
2148 assert(!is_cube_array);
2149 num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
2150 out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
2151 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
2152 *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
2153 return layer;
2154 } else {
2155 LLVMValueRef maxlayer;
2156 LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
2157 bld->int_bld.one;
2158 maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
2159 maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
2160 return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
2161 }
2162 }
2163
2164 static void
lp_build_sample_ms_offset(struct lp_build_context * int_coord_bld,LLVMValueRef ms_index,LLVMValueRef num_samples,LLVMValueRef sample_stride,LLVMValueRef * offset,LLVMValueRef * out_of_bounds)2165 lp_build_sample_ms_offset(struct lp_build_context *int_coord_bld,
2166 LLVMValueRef ms_index,
2167 LLVMValueRef num_samples,
2168 LLVMValueRef sample_stride,
2169 LLVMValueRef *offset,
2170 LLVMValueRef *out_of_bounds)
2171 {
2172 LLVMValueRef out1;
2173 num_samples = lp_build_broadcast_scalar(int_coord_bld, num_samples);
2174 sample_stride = lp_build_broadcast_scalar(int_coord_bld, sample_stride);
2175 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, ms_index, int_coord_bld->zero);
2176 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2177 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, ms_index, num_samples);
2178 *out_of_bounds = lp_build_or(int_coord_bld, *out_of_bounds, out1);
2179 LLVMValueRef sample_offset = lp_build_mul(int_coord_bld,
2180 sample_stride, ms_index);
2181 *offset = lp_build_add(int_coord_bld, *offset, sample_offset);
2182 }
2183
2184
2185 #define WEIGHT_LUT_SIZE 1024
2186
2187
2188 static void
lp_build_sample_aniso(struct lp_build_sample_context * bld,unsigned img_filter,unsigned mip_filter,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef lod_fpart,LLVMValueRef * colors_out)2189 lp_build_sample_aniso(struct lp_build_sample_context *bld,
2190 unsigned img_filter,
2191 unsigned mip_filter,
2192 bool is_gather,
2193 const LLVMValueRef *coords,
2194 const LLVMValueRef *offsets,
2195 LLVMValueRef ilevel0,
2196 LLVMValueRef ilevel1,
2197 LLVMValueRef lod_fpart,
2198 LLVMValueRef *colors_out)
2199 {
2200 struct gallivm_state *gallivm = bld->gallivm;
2201 LLVMBuilderRef builder = gallivm->builder;
2202 struct lp_build_context *coord_bld = &bld->coord_bld;
2203 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
2204 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(&bld->coord_bld, coords[0], coords[1]);
2205 LLVMValueRef float_size;
2206 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2207 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
2208 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
2209 const unsigned length = bld->coord_bld.type.length;
2210 const unsigned num_quads = length / 4;
2211 LLVMValueRef filter_table = bld->aniso_filter_table;
2212 LLVMValueRef size0, row_stride0_vec, img_stride0_vec;
2213 LLVMValueRef data_ptr0, mipoff0 = NULL;
2214
2215 lp_build_mipmap_level_sizes(bld, ilevel0,
2216 &size0,
2217 &row_stride0_vec, &img_stride0_vec);
2218 if (bld->num_mips == 1) {
2219 data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
2220 } else {
2221 /* This path should work for num_lods 1 too but slightly less efficient */
2222 data_ptr0 = bld->base_ptr;
2223 mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
2224 }
2225
2226 float_size = lp_build_int_to_float(&bld->float_size_in_bld, bld->int_size);
2227
2228 LLVMValueRef float_size_lvl = lp_build_int_to_float(&bld->float_size_bld, size0);
2229 /* extract width and height into vectors for use later */
2230 static const unsigned char swizzle15[] = { /* no-op swizzle */
2231 1, 1, 1, 1, 5, 5, 5, 5
2232 };
2233 static const unsigned char swizzle04[] = { /* no-op swizzle */
2234 0, 0, 0, 0, 4, 4, 4, 4
2235 };
2236 LLVMValueRef width_dim, height_dim;
2237
2238 width_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle04,
2239 bld->float_size_bld.type.length,
2240 bld->coord_bld.type.length);
2241 height_dim = lp_build_swizzle_aos_n(gallivm, float_size_lvl, swizzle15,
2242 bld->float_size_bld.type.length,
2243 bld->coord_bld.type.length);
2244
2245
2246 /* shuffle width/height for ddx/ddy calculations. */
2247 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
2248
2249 for (unsigned i = 0; i < num_quads; i++) {
2250 shuffles[i*4+0] = shuffles[i*4+1] = index0;
2251 shuffles[i*4+2] = shuffles[i*4+3] = index1;
2252 }
2253
2254 LLVMValueRef floatdim =
2255 LLVMBuildShuffleVector(builder, float_size, float_size,
2256 LLVMConstVector(shuffles, length), "");
2257
2258 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
2259
2260 LLVMValueRef scaling =
2261 lp_build_shl(&bld->leveli_bld, bld->leveli_bld.one, ilevel0);
2262 scaling = lp_build_int_to_float(&bld->levelf_bld, scaling);
2263 scaling = lp_build_rcp(&bld->levelf_bld, scaling);
2264
2265 if (bld->levelf_bld.type.length != length) {
2266 if (bld->levelf_bld.type.length == 1) {
2267 scaling = lp_build_broadcast_scalar(coord_bld,
2268 scaling);
2269 } else {
2270 scaling = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2271 bld->levelf_bld.type,
2272 coord_bld->type,
2273 scaling);
2274 }
2275 }
2276
2277 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, scaling);
2278
2279 static const unsigned char swizzle01[] = { /* no-op swizzle */
2280 0, 1, 0, 1,
2281 };
2282 static const unsigned char swizzle23[] = {
2283 2, 3, 2, 3,
2284 };
2285
2286 LLVMValueRef ddx_ddys, ddx_ddyt;
2287 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
2288 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
2289
2290 /* compute ellipse coefficients */
2291 /* * A*x*x + B*x*y + C*y*y = F.*/
2292 /* float A = vx*vx+vy*vy+1; */
2293 LLVMValueRef A = lp_build_mul(coord_bld, ddx_ddyt, ddx_ddyt);
2294
2295 LLVMValueRef Ay = lp_build_swizzle_aos(coord_bld, A, swizzle15);
2296 A = lp_build_add(coord_bld, A, Ay);
2297 A = lp_build_add(coord_bld, A, coord_bld->one);
2298 A = lp_build_swizzle_aos(coord_bld, A, swizzle04);
2299
2300 /* float B = -2*(ux*vx+uy*vy); */
2301 LLVMValueRef B = lp_build_mul(coord_bld, ddx_ddys, ddx_ddyt);
2302 LLVMValueRef By = lp_build_swizzle_aos(coord_bld, B, swizzle15);
2303 B = lp_build_add(coord_bld, B, By);
2304 B = lp_build_mul_imm(coord_bld, B, -2);
2305 B = lp_build_swizzle_aos(coord_bld, B, swizzle04);
2306
2307 /* float C = ux*ux+uy*uy+1; */
2308 LLVMValueRef C = lp_build_mul(coord_bld, ddx_ddys, ddx_ddys);
2309 LLVMValueRef Cy = lp_build_swizzle_aos(coord_bld, C, swizzle15);
2310 C = lp_build_add(coord_bld, C, Cy);
2311 C = lp_build_add(coord_bld, C, coord_bld->one);
2312 C = lp_build_swizzle_aos(coord_bld, C, swizzle04);
2313
2314 /* float F = A*C-B*B/4.0f; */
2315 LLVMValueRef F = lp_build_mul(coord_bld, B, B);
2316 F = lp_build_div(coord_bld, F, lp_build_const_vec(gallivm, coord_bld->type, 4.0));
2317 LLVMValueRef F_p2 = lp_build_mul(coord_bld, A, C);
2318 F = lp_build_sub(coord_bld, F_p2, F);
2319
2320 /* compute ellipse bounding box in texture space */
2321 /* const float d = -B*B+4.0f*C*A; */
2322 LLVMValueRef d = lp_build_sub(coord_bld, coord_bld->zero, lp_build_mul(coord_bld, B, B));
2323 LLVMValueRef d_p2 = lp_build_mul(coord_bld, A, C);
2324 d_p2 = lp_build_mul_imm(coord_bld, d_p2, 4);
2325 d = lp_build_add(coord_bld, d, d_p2);
2326
2327 /* const float box_u = 2.0f / d * sqrtf(d*C*F); */
2328 /* box_u -> half of bbox with */
2329 LLVMValueRef temp;
2330 temp = lp_build_mul(coord_bld, d, C);
2331 temp = lp_build_mul(coord_bld, temp, F);
2332 temp = lp_build_sqrt(coord_bld, temp);
2333
2334 LLVMValueRef box_u = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2335 box_u = lp_build_mul(coord_bld, box_u, temp);
2336
2337 /* const float box_v = 2.0f / d * sqrtf(A*d*F); */
2338 /* box_v -> half of bbox height */
2339 temp = lp_build_mul(coord_bld, A, d);
2340 temp = lp_build_mul(coord_bld, temp, F);
2341 temp = lp_build_sqrt(coord_bld, temp);
2342
2343 LLVMValueRef box_v = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, 2.0), d);
2344 box_v = lp_build_mul(coord_bld, box_v, temp);
2345
2346 /* Scale ellipse formula to directly index the Filter Lookup Table.
2347 * i.e. scale so that F = WEIGHT_LUT_SIZE-1
2348 */
2349 LLVMValueRef formScale = lp_build_div(coord_bld, lp_build_const_vec(gallivm, coord_bld->type, WEIGHT_LUT_SIZE - 1), F);
2350
2351 A = lp_build_mul(coord_bld, A, formScale);
2352 B = lp_build_mul(coord_bld, B, formScale);
2353 C = lp_build_mul(coord_bld, C, formScale);
2354 /* F *= formScale; */ /* no need to scale F as we don't use it below here */
2355
2356 LLVMValueRef ddq = lp_build_mul_imm(coord_bld, A, 2);
2357
2358 /* Heckbert MS thesis, p. 59; scan over the bounding box of the ellipse
2359 * and incrementally update the value of Ax^2+Bxy*Cy^2; when this
2360 * value, q, is less than F, we're inside the ellipse
2361 */
2362
2363 LLVMValueRef float_size0 = lp_build_int_to_float(float_size_bld, bld->int_size);
2364 LLVMValueRef width0 = lp_build_extract_broadcast(gallivm,
2365 float_size_bld->type,
2366 coord_bld->type,
2367 float_size0, index0);
2368 LLVMValueRef height0 = lp_build_extract_broadcast(gallivm,
2369 float_size_bld->type,
2370 coord_bld->type,
2371 float_size0, index1);
2372
2373 /* texture->width0 * scaling */
2374 width0 = lp_build_mul(coord_bld, width0, scaling);
2375 /* texture->height0 * scaling */
2376 height0 = lp_build_mul(coord_bld, height0, scaling);
2377
2378 /* tex_u = -0.5f * s[j] * texture->width0 * scaling */
2379 LLVMValueRef tex_u = lp_build_mul(coord_bld, coords[0], width0);
2380 tex_u = lp_build_add(coord_bld, tex_u, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2381
2382 /* tex_v = -0.5f * t[j] * texture->height0 * scaling */
2383 LLVMValueRef tex_v = lp_build_mul(coord_bld, coords[1], height0);
2384 tex_v = lp_build_add(coord_bld, tex_v, lp_build_const_vec(gallivm, coord_bld->type, -0.5f));
2385
2386 /* const int u0 = (int) floorf(tex_u - box_u); */
2387 LLVMValueRef u0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_u, box_u)));
2388 /* const int u1 = (int) ceilf(tex_u + box_u); */
2389 LLVMValueRef u1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_u, box_u)));
2390
2391 /* const int v0 = (int) floorf(tex_v - box_v); */
2392 LLVMValueRef v0 = lp_build_itrunc(coord_bld, lp_build_floor(coord_bld, lp_build_sub(coord_bld, tex_v, box_v)));
2393 /* const int v1 = (int) ceilf(tex_v + box_v); */
2394 LLVMValueRef v1 = lp_build_itrunc(coord_bld, lp_build_ceil(coord_bld, lp_build_add(coord_bld, tex_v, box_v)));
2395
2396 /* const float U = u0 - tex_u; */
2397 LLVMValueRef U = lp_build_sub(coord_bld, lp_build_int_to_float(coord_bld, u0), tex_u);
2398
2399 /* A * (2 * U + 1) */
2400 LLVMValueRef dq_base = lp_build_mul_imm(coord_bld, U, 2);
2401 dq_base = lp_build_add(coord_bld, dq_base, coord_bld->one);
2402 dq_base = lp_build_mul(coord_bld, dq_base, A);
2403
2404 /* A * U * U */
2405 LLVMValueRef q_base = lp_build_mul(coord_bld, U, U);
2406 q_base = lp_build_mul(coord_bld, q_base, A);
2407
2408 LLVMValueRef colors0[4];
2409 LLVMValueRef den_store = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "den");
2410
2411 for (unsigned chan = 0; chan < 4; chan++)
2412 colors0[chan] = lp_build_alloca(gallivm, bld->texel_bld.vec_type, "colors");
2413
2414 LLVMValueRef q_store, dq_store;
2415 q_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "q");
2416 dq_store = lp_build_alloca(gallivm, bld->coord_bld.vec_type, "dq");
2417
2418 LLVMValueRef v_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "v_limiter");
2419 LLVMValueRef u_limiter = lp_build_alloca(gallivm, bld->int_coord_bld.vec_type, "u_limiter");
2420
2421 LLVMBuildStore(builder, v0, v_limiter);
2422
2423 /* create an LLVM loop block for the V iterator */
2424 LLVMBasicBlockRef v_loop_block = lp_build_insert_new_block(gallivm, "vloop");
2425
2426 LLVMBuildBr(builder, v_loop_block);
2427 LLVMPositionBuilderAtEnd(builder, v_loop_block);
2428
2429 LLVMValueRef v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2430 LLVMValueRef v_mask = LLVMBuildICmp(builder, LLVMIntSLE, v_val, v1, "");
2431
2432 /* loop over V values. */
2433 {
2434 /* const float V = v - tex_v; */
2435 LLVMValueRef V =
2436 lp_build_sub(coord_bld,
2437 lp_build_int_to_float(coord_bld, v_val), tex_v);
2438
2439 /* float dq = dq_base + B * V; */
2440 LLVMValueRef dq = lp_build_mul(coord_bld, V, B);
2441 dq = lp_build_add(coord_bld, dq, dq_base);
2442
2443 /* float q = (C * V + B * U) * V + q_base */
2444 LLVMValueRef q = lp_build_mul(coord_bld, C, V);
2445 q = lp_build_add(coord_bld, q, lp_build_mul(coord_bld, B, U));
2446 q = lp_build_mul(coord_bld, q, V);
2447 q = lp_build_add(coord_bld, q, q_base);
2448
2449 LLVMBuildStore(builder, q, q_store);
2450 LLVMBuildStore(builder, dq, dq_store);
2451
2452 LLVMBuildStore(builder, u0, u_limiter);
2453
2454 /* create an LLVM loop block for the V iterator */
2455 LLVMBasicBlockRef u_loop_block = lp_build_insert_new_block(gallivm, "uloop");
2456
2457 LLVMBuildBr(builder, u_loop_block);
2458 LLVMPositionBuilderAtEnd(builder, u_loop_block);
2459
2460 LLVMValueRef u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type,
2461 u_limiter, "");
2462 LLVMValueRef u_mask = LLVMBuildICmp(builder,
2463 LLVMIntSLE,
2464 u_val,
2465 u1, "");
2466
2467 /* loop over U values */
2468 {
2469 /* q = (int)q */
2470 q = lp_build_itrunc(coord_bld,
2471 LLVMBuildLoad2(builder, bld->coord_bld.vec_type,
2472 q_store, ""));
2473
2474 /*
2475 * avoid OOB access to filter table, generate a mask for q > 1024,
2476 * then truncate it.
2477 */
2478 LLVMValueRef q_mask = LLVMBuildICmp(builder,
2479 LLVMIntSLE,
2480 q,
2481 lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff), "");
2482 q_mask = LLVMBuildSExt(builder, q_mask, bld->int_coord_bld.vec_type, "");
2483
2484 q = lp_build_max(&bld->int_coord_bld, q, bld->int_coord_bld.zero);
2485 q = lp_build_and(&bld->int_coord_bld, q, lp_build_const_int_vec(gallivm, bld->int_coord_bld.type, 0x3ff));
2486
2487 /* update the offsets to deal with float size. */
2488 q = lp_build_mul_imm(&bld->int_coord_bld, q, 4);
2489 filter_table = LLVMBuildBitCast(gallivm->builder, filter_table, LLVMPointerType(LLVMInt8TypeInContext(gallivm->context), 0), "");
2490
2491 /* Lookup weights in filter table */
2492 LLVMValueRef weights = lp_build_gather(gallivm, coord_bld->type.length,
2493 coord_bld->type.width,
2494 lp_elem_type(coord_bld->type),
2495 true, filter_table, q, true);
2496
2497 /*
2498 * Mask off the weights here which should ensure no-op for loops
2499 * where some of the u/v values are not being calculated.
2500 */
2501 weights = LLVMBuildBitCast(builder, weights, bld->int_coord_bld.vec_type, "");
2502 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, ""));
2503 weights = lp_build_and(&bld->int_coord_bld, weights, LLVMBuildSExt(builder, v_mask, bld->int_coord_bld.vec_type, ""));
2504 weights = lp_build_and(&bld->int_coord_bld, weights, q_mask);
2505 weights = LLVMBuildBitCast(builder, weights, bld->coord_bld.vec_type, "");
2506
2507 /* if the weights are all 0 avoid doing the sampling at all. */
2508 struct lp_build_if_state noloadw0;
2509
2510 LLVMValueRef wnz = LLVMBuildFCmp(gallivm->builder, LLVMRealUNE,
2511 weights, bld->coord_bld.zero, "");
2512 wnz = LLVMBuildSExt(builder, wnz, bld->int_coord_bld.vec_type, "");
2513 wnz = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, wnz);
2514 lp_build_if(&noloadw0, gallivm, wnz);
2515 LLVMValueRef new_coords[4];
2516 new_coords[0] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, u_val), width_dim);
2517 new_coords[1] = lp_build_div(coord_bld, lp_build_int_to_float(coord_bld, v_val), height_dim);
2518 new_coords[2] = coords[2];
2519 new_coords[3] = coords[3];
2520
2521 /* lookup q in filter table */
2522 LLVMValueRef temp_colors[4];
2523 lp_build_sample_image_nearest(bld, size0,
2524 row_stride0_vec, img_stride0_vec,
2525 data_ptr0, mipoff0, ilevel0, new_coords, offsets,
2526 temp_colors);
2527
2528 for (unsigned chan = 0; chan < 4; chan++) {
2529 LLVMValueRef tcolor = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, colors0[chan], "");
2530
2531 tcolor = lp_build_add(&bld->texel_bld, tcolor, lp_build_mul(&bld->texel_bld, temp_colors[chan], weights));
2532 LLVMBuildStore(builder, tcolor, colors0[chan]);
2533 }
2534
2535 /* multiple colors by weight and add in. */
2536 /* den += weight; */
2537 LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2538 den = lp_build_add(&bld->texel_bld, den, weights);
2539 LLVMBuildStore(builder, den, den_store);
2540
2541 lp_build_endif(&noloadw0);
2542 /* q += dq; */
2543 /* dq += ddq; */
2544 q = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, q_store, "");
2545 dq = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, dq_store, "");
2546 q = lp_build_add(coord_bld, q, dq);
2547 dq = lp_build_add(coord_bld, dq, ddq);
2548 LLVMBuildStore(builder, q, q_store);
2549 LLVMBuildStore(builder, dq, dq_store);
2550 }
2551 /* u += 1 */
2552 u_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, u_limiter, "");
2553 u_val = lp_build_add(&bld->int_coord_bld, u_val, bld->int_coord_bld.one);
2554 LLVMBuildStore(builder, u_val, u_limiter);
2555
2556 u_mask = LLVMBuildICmp(builder,
2557 LLVMIntSLE,
2558 u_val,
2559 u1, "");
2560 LLVMValueRef u_end_cond = LLVMBuildSExt(builder, u_mask, bld->int_coord_bld.vec_type, "");
2561 u_end_cond = lp_build_any_true_range(&bld->coord_bld, bld->coord_bld.type.length, u_end_cond);
2562
2563 LLVMBasicBlockRef u_end_loop = lp_build_insert_new_block(gallivm, "u_end_loop");
2564
2565 LLVMBuildCondBr(builder, u_end_cond,
2566 u_loop_block, u_end_loop);
2567
2568 LLVMPositionBuilderAtEnd(builder, u_end_loop);
2569
2570 }
2571
2572 /* v += 1 */
2573 v_val = LLVMBuildLoad2(builder, bld->int_coord_bld.vec_type, v_limiter, "");
2574 v_val = lp_build_add(&bld->int_coord_bld, v_val, bld->int_coord_bld.one);
2575 LLVMBuildStore(builder, v_val, v_limiter);
2576
2577 v_mask = LLVMBuildICmp(builder,
2578 LLVMIntSLE,
2579 v_val,
2580 v1, "");
2581 LLVMValueRef v_end_cond = LLVMBuildSExt(builder, v_mask,
2582 bld->int_coord_bld.vec_type, "");
2583 v_end_cond = lp_build_any_true_range(&bld->coord_bld,
2584 bld->coord_bld.type.length, v_end_cond);
2585
2586 LLVMBasicBlockRef v_end_loop = lp_build_insert_new_block(gallivm, "v_end_loop");
2587
2588 LLVMBuildCondBr(builder, v_end_cond,
2589 v_loop_block, v_end_loop);
2590
2591 LLVMPositionBuilderAtEnd(builder, v_end_loop);
2592
2593 LLVMValueRef den = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, den_store, "");
2594
2595 for (unsigned chan = 0; chan < 4; chan++) {
2596 colors0[chan] =
2597 lp_build_div(&bld->texel_bld,
2598 LLVMBuildLoad2(builder, bld->texel_bld.vec_type,
2599 colors0[chan], ""), den);
2600 }
2601
2602 LLVMValueRef den0 = lp_build_cmp(&bld->coord_bld, PIPE_FUNC_EQUAL,
2603 den, bld->coord_bld.zero);
2604
2605 LLVMValueRef den0_any =
2606 lp_build_any_true_range(&bld->coord_bld,
2607 bld->coord_bld.type.length, den0);
2608
2609 struct lp_build_if_state den0_fallback;
2610 lp_build_if(&den0_fallback, gallivm, den0_any);
2611 {
2612 LLVMValueRef colors_den0[4];
2613 lp_build_sample_image_linear(bld, false, size0, NULL,
2614 row_stride0_vec, img_stride0_vec,
2615 data_ptr0, mipoff0, ilevel0, coords, offsets,
2616 colors_den0);
2617 for (unsigned chan = 0; chan < 4; chan++) {
2618 LLVMValueRef chan_val =
2619 lp_build_select(&bld->texel_bld, den0,
2620 colors_den0[chan], colors0[chan]);
2621 LLVMBuildStore(builder, chan_val, colors_out[chan]);
2622 }
2623 }
2624 lp_build_else(&den0_fallback);
2625 {
2626 for (unsigned chan = 0; chan < 4; chan++) {
2627 LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
2628 }
2629 }
2630 lp_build_endif(&den0_fallback);
2631 }
2632
2633
2634 /**
2635 * Calculate cube face, lod, mip levels.
2636 */
2637 static void
lp_build_sample_common(struct lp_build_sample_context * bld,bool is_lodq,unsigned texture_index,unsigned sampler_index,LLVMValueRef * coords,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,LLVMValueRef * lod_pos_or_zero,LLVMValueRef * lod,LLVMValueRef * lod_fpart,LLVMValueRef * ilevel0,LLVMValueRef * ilevel1)2638 lp_build_sample_common(struct lp_build_sample_context *bld,
2639 bool is_lodq,
2640 unsigned texture_index,
2641 unsigned sampler_index,
2642 LLVMValueRef *coords,
2643 const struct lp_derivatives *derivs, /* optional */
2644 LLVMValueRef lod_bias, /* optional */
2645 LLVMValueRef explicit_lod, /* optional */
2646 LLVMValueRef *lod_pos_or_zero,
2647 LLVMValueRef *lod,
2648 LLVMValueRef *lod_fpart,
2649 LLVMValueRef *ilevel0,
2650 LLVMValueRef *ilevel1)
2651 {
2652 const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
2653 const unsigned min_filter = bld->static_sampler_state->min_img_filter;
2654 const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
2655 const unsigned target = bld->static_texture_state->target;
2656 const bool aniso = bld->static_sampler_state->aniso;
2657 LLVMValueRef first_level, last_level;
2658 LLVMValueRef lod_ipart = NULL;
2659 struct lp_derivatives cube_derivs;
2660
2661 /*
2662 printf("%s mip %d min %d mag %d\n", __func__,
2663 mip_filter, min_filter, mag_filter);
2664 */
2665
2666 first_level = get_first_level(bld->gallivm,
2667 bld->resources_type,
2668 bld->resources_ptr,
2669 texture_index, NULL,
2670 bld->static_texture_state,
2671 bld->dynamic_state);
2672 last_level = get_last_level(bld->gallivm,
2673 bld->resources_type,
2674 bld->resources_ptr,
2675 texture_index, NULL,
2676 bld->static_texture_state,
2677 bld->dynamic_state);
2678
2679 /*
2680 * Choose cube face, recompute texcoords for the chosen face and
2681 * calculate / transform derivatives.
2682 */
2683 if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
2684 bool need_derivs = ((min_filter != mag_filter ||
2685 mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
2686 !bld->static_sampler_state->min_max_lod_equal &&
2687 !explicit_lod);
2688 lp_build_cube_lookup(bld, coords, derivs, &cube_derivs, need_derivs);
2689 if (need_derivs)
2690 derivs = &cube_derivs;
2691
2692 if (target == PIPE_TEXTURE_CUBE_ARRAY && !is_lodq) {
2693 /* calculate cube layer coord now */
2694 LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
2695 LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
2696 layer = lp_build_mul(&bld->int_coord_bld, layer, six);
2697 coords[3] = lp_build_layer_coord(bld, texture_index, true, layer, NULL);
2698 /* because of seamless filtering can't add it to face (coords[2]) here. */
2699 }
2700 } else if ((target == PIPE_TEXTURE_1D_ARRAY ||
2701 target == PIPE_TEXTURE_2D_ARRAY) && !is_lodq) {
2702 coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
2703 coords[2] = lp_build_layer_coord(bld, texture_index, false, coords[2], NULL);
2704 }
2705
2706 if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
2707 /*
2708 * Clamp p coords to [0,1] for fixed function depth texture format here.
2709 * Technically this is not entirely correct for unorm depth as the ref
2710 * value should be converted to the depth format (quantization!) and
2711 * comparison then done in texture format. This would actually help
2712 * performance (since only need to do it once and could save the
2713 * per-sample conversion of texels to floats instead), but it would need
2714 * more messy code (would need to push at least some bits down to actual
2715 * fetch so conversion could be skipped, and would have ugly interaction
2716 * with border color, would need to convert border color to that format
2717 * too or do some other tricks to make it work).
2718 */
2719 const struct util_format_description *format_desc = bld->format_desc;
2720 /* not entirely sure we couldn't end up with non-valid swizzle here */
2721 const enum util_format_type chan_type =
2722 format_desc->swizzle[0] <= PIPE_SWIZZLE_W
2723 ? format_desc->channel[format_desc->swizzle[0]].type
2724 : UTIL_FORMAT_TYPE_FLOAT;
2725 if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
2726 coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
2727 bld->coord_bld.zero, bld->coord_bld.one);
2728 }
2729 }
2730
2731 /*
2732 * Compute the level of detail (float).
2733 */
2734 if (min_filter != mag_filter ||
2735 mip_filter != PIPE_TEX_MIPFILTER_NONE || is_lodq) {
2736 LLVMValueRef max_aniso = NULL;
2737
2738 if (aniso)
2739 max_aniso = bld->dynamic_state->max_aniso(bld->gallivm,
2740 bld->resources_type,
2741 bld->resources_ptr,
2742 sampler_index);
2743
2744 /* Need to compute lod either to choose mipmap levels or to
2745 * distinguish between minification/magnification with one mipmap level.
2746 */
2747 LLVMValueRef first_level_vec =
2748 lp_build_broadcast_scalar(&bld->int_size_in_bld, first_level);
2749 lp_build_lod_selector(bld, is_lodq, sampler_index,
2750 first_level_vec,
2751 coords[0], coords[1], coords[2],
2752 derivs, lod_bias, explicit_lod,
2753 mip_filter, max_aniso, lod,
2754 &lod_ipart, lod_fpart, lod_pos_or_zero);
2755 if (is_lodq) {
2756 last_level = lp_build_sub(&bld->int_bld, last_level, first_level);
2757 last_level = lp_build_int_to_float(&bld->float_bld, last_level);
2758 last_level = lp_build_broadcast_scalar(&bld->lodf_bld, last_level);
2759
2760 switch (mip_filter) {
2761 case PIPE_TEX_MIPFILTER_NONE:
2762 *lod_fpart = bld->lodf_bld.zero;
2763 break;
2764 case PIPE_TEX_MIPFILTER_NEAREST:
2765 *lod_fpart = lp_build_round(&bld->lodf_bld, *lod_fpart);
2766 FALLTHROUGH;
2767 case PIPE_TEX_MIPFILTER_LINEAR:
2768 *lod_fpart = lp_build_clamp(&bld->lodf_bld, *lod_fpart,
2769 bld->lodf_bld.zero, last_level);
2770 break;
2771 }
2772 return;
2773 }
2774 } else {
2775 lod_ipart = bld->lodi_bld.zero;
2776 *lod_pos_or_zero = bld->lodi_bld.zero;
2777 }
2778
2779 if ((bld->num_lods != bld->num_mips || bld->num_lods == 1) &&
2780 bld->lodi_bld.type.length != 1) {
2781 /* only makes sense if there's just a single mip level */
2782 assert(bld->num_mips == 1);
2783 lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
2784 }
2785
2786 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
2787 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
2788
2789 /*
2790 * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
2791 */
2792
2793 if (aniso) {
2794 lp_build_nearest_mip_level(bld,
2795 first_level, last_level,
2796 lod_ipart, ilevel0, NULL);
2797 return;
2798 }
2799
2800 switch (mip_filter) {
2801 default:
2802 unreachable("Bad mip_filter value in lp_build_sample_soa()");
2803 case PIPE_TEX_MIPFILTER_NONE:
2804 /* always use mip level 0 */
2805 *ilevel0 = first_level;
2806 break;
2807 case PIPE_TEX_MIPFILTER_NEAREST:
2808 assert(lod_ipart);
2809 lp_build_nearest_mip_level(bld,
2810 first_level, last_level,
2811 lod_ipart, ilevel0, NULL);
2812 break;
2813 case PIPE_TEX_MIPFILTER_LINEAR:
2814 assert(lod_ipart);
2815 assert(*lod_fpart);
2816
2817 lp_build_linear_mip_levels(bld, texture_index,
2818 first_level, last_level,
2819 lod_ipart, lod_fpart,
2820 ilevel0, ilevel1);
2821 break;
2822 }
2823 }
2824
2825
2826 static void
lp_build_clamp_border_color(struct lp_build_sample_context * bld,unsigned sampler_unit)2827 lp_build_clamp_border_color(struct lp_build_sample_context *bld,
2828 unsigned sampler_unit)
2829 {
2830 struct gallivm_state *gallivm = bld->gallivm;
2831 LLVMBuilderRef builder = gallivm->builder;
2832 LLVMValueRef border_color_ptr =
2833 bld->dynamic_state->border_color(gallivm,
2834 bld->resources_type,
2835 bld->resources_ptr, sampler_unit);
2836 LLVMValueRef border_color;
2837 const struct util_format_description *format_desc = bld->format_desc;
2838 struct lp_type vec4_type = bld->texel_type;
2839 struct lp_build_context vec4_bld;
2840 LLVMValueRef min_clamp = NULL;
2841 LLVMValueRef max_clamp = NULL;
2842
2843 /*
2844 * For normalized format need to clamp border color (technically
2845 * probably should also quantize the data). Really sucks doing this
2846 * here but can't avoid at least for now since this is part of
2847 * sampler state and texture format is part of sampler_view state.
2848 * GL expects also expects clamping for uint/sint formats too so
2849 * do that as well (d3d10 can't end up here with uint/sint since it
2850 * only supports them with ld).
2851 */
2852 vec4_type.length = 4;
2853 lp_build_context_init(&vec4_bld, gallivm, vec4_type);
2854
2855 /*
2856 * Vectorized clamping of border color. Loading is a bit of a hack since
2857 * we just cast the pointer to float array to pointer to vec4
2858 * (int or float).
2859 */
2860 LLVMTypeRef border_color_type = LLVMArrayType(LLVMFloatTypeInContext(gallivm->context), 4);
2861 border_color_ptr = lp_build_array_get_ptr2(gallivm, border_color_type, border_color_ptr,
2862 lp_build_const_int32(gallivm, 0));
2863 border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
2864 LLVMPointerType(vec4_bld.vec_type, 0), "");
2865 border_color = LLVMBuildLoad2(builder, vec4_bld.vec_type, border_color_ptr, "");
2866 /* we don't have aligned type in the dynamic state unfortunately */
2867 LLVMSetAlignment(border_color, 4);
2868
2869 /*
2870 * Instead of having some incredibly complex logic which will try to figure
2871 * out clamping necessary for each channel, simply use the first channel,
2872 * and treat mixed signed/unsigned normalized formats specially. (Mixed
2873 * non-normalized, which wouldn't work at all here, do not exist for a good
2874 * reason.)
2875 */
2876 if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
2877 int chan;
2878 /* d/s needs special handling because both present means just sampling depth */
2879 if (util_format_is_depth_and_stencil(format_desc->format)) {
2880 chan = format_desc->swizzle[0];
2881 } else {
2882 chan = util_format_get_first_non_void_channel(format_desc->format);
2883 }
2884 if (chan >= 0 && chan <= PIPE_SWIZZLE_W) {
2885 unsigned chan_type = format_desc->channel[chan].type;
2886 unsigned chan_norm = format_desc->channel[chan].normalized;
2887 unsigned chan_pure = format_desc->channel[chan].pure_integer;
2888 if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
2889 if (chan_norm) {
2890 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2891 max_clamp = vec4_bld.one;
2892 } else if (chan_pure) {
2893 /*
2894 * Border color was stored as int, hence need min/max clamp
2895 * only if chan has less than 32 bits..
2896 */
2897 unsigned chan_size = format_desc->channel[chan].size;
2898 if (chan_size < 32) {
2899 min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2900 0 - (1 << (chan_size - 1)));
2901 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2902 (1 << (chan_size - 1)) - 1);
2903 }
2904 }
2905 /* TODO: no idea about non-pure, non-normalized! */
2906 } else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
2907 if (chan_norm) {
2908 min_clamp = vec4_bld.zero;
2909 max_clamp = vec4_bld.one;
2910 } else if (chan_pure) {
2911 /*
2912 * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
2913 * we use Z32_FLOAT_S8X24 to imply sampling depth component and
2914 * ignoring stencil, which will blow up here if we try to do a
2915 * uint clamp in a float texel build... And even if we had
2916 * that format, mesa st also thinks using z24s8 means depth
2917 * sampling ignoring stencil.
2918 */
2919
2920 /*
2921 * Border color was stored as uint, hence never need min clamp,
2922 * and only need max clamp if chan has less than 32 bits.
2923 */
2924 unsigned chan_size = format_desc->channel[chan].size;
2925 if (chan_size < 32) {
2926 max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2927 (1 << chan_size) - 1);
2928 }
2929 /* TODO: no idea about non-pure, non-normalized! */
2930 }
2931 } else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2932 /* TODO: I have no idea what clamp this would need if any! */
2933 }
2934 }
2935 /* mixed plain formats (or different pure size) */
2936 switch (format_desc->format) {
2937 case PIPE_FORMAT_B10G10R10A2_UINT:
2938 case PIPE_FORMAT_R10G10B10A2_UINT:
2939 {
2940 unsigned max10 = (1 << 10) - 1;
2941 max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2942 max10, (1 << 2) - 1, NULL);
2943 }
2944 break;
2945 case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2946 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2947 -1.0F, 0.0F, NULL);
2948 max_clamp = vec4_bld.one;
2949 break;
2950 case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2951 case PIPE_FORMAT_R5SG5SB6U_NORM:
2952 min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2953 0.0F, 0.0F, NULL);
2954 max_clamp = vec4_bld.one;
2955 break;
2956 default:
2957 break;
2958 }
2959 } else {
2960 /* cannot figure this out from format description */
2961 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2962 /* s3tc formats are always unorm */
2963 min_clamp = vec4_bld.zero;
2964 max_clamp = vec4_bld.one;
2965 } else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2966 format_desc->layout == UTIL_FORMAT_LAYOUT_ETC ||
2967 format_desc->layout == UTIL_FORMAT_LAYOUT_BPTC) {
2968 switch (format_desc->format) {
2969 case PIPE_FORMAT_RGTC1_UNORM:
2970 case PIPE_FORMAT_RGTC2_UNORM:
2971 case PIPE_FORMAT_LATC1_UNORM:
2972 case PIPE_FORMAT_LATC2_UNORM:
2973 case PIPE_FORMAT_ETC1_RGB8:
2974 case PIPE_FORMAT_BPTC_RGBA_UNORM:
2975 case PIPE_FORMAT_BPTC_SRGBA:
2976 min_clamp = vec4_bld.zero;
2977 max_clamp = vec4_bld.one;
2978 break;
2979 case PIPE_FORMAT_RGTC1_SNORM:
2980 case PIPE_FORMAT_RGTC2_SNORM:
2981 case PIPE_FORMAT_LATC1_SNORM:
2982 case PIPE_FORMAT_LATC2_SNORM:
2983 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2984 max_clamp = vec4_bld.one;
2985 break;
2986 case PIPE_FORMAT_BPTC_RGB_FLOAT:
2987 /* not sure if we should clamp to max half float? */
2988 break;
2989 case PIPE_FORMAT_BPTC_RGB_UFLOAT:
2990 min_clamp = vec4_bld.zero;
2991 break;
2992 default:
2993 assert(0);
2994 break;
2995 }
2996 } else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2997 /*
2998 * all others from subsampled/other group, though we don't care
2999 * about yuv (and should not have any from zs here)
3000 */
3001 switch (format_desc->format) {
3002 case PIPE_FORMAT_R8G8_B8G8_UNORM:
3003 case PIPE_FORMAT_G8R8_G8B8_UNORM:
3004 case PIPE_FORMAT_G8R8_B8R8_UNORM:
3005 case PIPE_FORMAT_R8G8_R8B8_UNORM:
3006 case PIPE_FORMAT_G8B8_G8R8_UNORM:
3007 case PIPE_FORMAT_B8G8_R8G8_UNORM:
3008 case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
3009 min_clamp = vec4_bld.zero;
3010 max_clamp = vec4_bld.one;
3011 break;
3012 case PIPE_FORMAT_R8G8Bx_SNORM:
3013 min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
3014 max_clamp = vec4_bld.one;
3015 break;
3016 /*
3017 * Note smallfloat formats usually don't need clamping
3018 * (they still have infinite range) however this is not
3019 * true for r11g11b10 and r9g9b9e5, which can't represent
3020 * negative numbers (and additionally r9g9b9e5 can't represent
3021 * very large numbers). d3d10 seems happy without clamping in
3022 * this case, but gl spec is pretty clear: "for floating
3023 * point and integer formats, border values are clamped to
3024 * the representable range of the format" so do that here.
3025 */
3026 case PIPE_FORMAT_R11G11B10_FLOAT:
3027 min_clamp = vec4_bld.zero;
3028 break;
3029 case PIPE_FORMAT_R9G9B9E5_FLOAT:
3030 min_clamp = vec4_bld.zero;
3031 max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
3032 break;
3033 default:
3034 assert(0);
3035 break;
3036 }
3037 }
3038 }
3039
3040 if (min_clamp) {
3041 border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
3042 }
3043 if (max_clamp) {
3044 border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
3045 }
3046
3047 bld->border_color_clamped = border_color;
3048 }
3049
3050
3051 /**
3052 * General texture sampling codegen.
3053 * This function handles texture sampling for all texture targets (1D,
3054 * 2D, 3D, cube) and all filtering modes.
3055 */
3056 static void
lp_build_sample_general(struct lp_build_sample_context * bld,unsigned sampler_unit,bool is_gather,const LLVMValueRef * coords,const LLVMValueRef * offsets,LLVMValueRef lod_positive,LLVMValueRef lod_fpart,LLVMValueRef ilevel0,LLVMValueRef ilevel1,LLVMValueRef * colors_out)3057 lp_build_sample_general(struct lp_build_sample_context *bld,
3058 unsigned sampler_unit,
3059 bool is_gather,
3060 const LLVMValueRef *coords,
3061 const LLVMValueRef *offsets,
3062 LLVMValueRef lod_positive,
3063 LLVMValueRef lod_fpart,
3064 LLVMValueRef ilevel0,
3065 LLVMValueRef ilevel1,
3066 LLVMValueRef *colors_out)
3067 {
3068 LLVMBuilderRef builder = bld->gallivm->builder;
3069 const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
3070 const unsigned mip_filter = sampler_state->min_mip_filter;
3071 const unsigned min_filter = sampler_state->min_img_filter;
3072 const unsigned mag_filter = sampler_state->mag_img_filter;
3073 LLVMValueRef texels[4];
3074 unsigned chan;
3075
3076 /* if we need border color, (potentially) clamp it now */
3077 if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
3078 min_filter,
3079 mag_filter) ||
3080 (bld->dims > 1 &&
3081 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
3082 min_filter,
3083 mag_filter)) ||
3084 (bld->dims > 2 &&
3085 lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
3086 min_filter,
3087 mag_filter))) {
3088 lp_build_clamp_border_color(bld, sampler_unit);
3089 }
3090
3091
3092 /*
3093 * Get/interpolate texture colors.
3094 */
3095
3096 for (chan = 0; chan < 4; ++chan) {
3097 texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
3098 lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
3099 }
3100
3101 if (sampler_state->aniso) {
3102 lp_build_sample_aniso(bld, PIPE_TEX_FILTER_NEAREST, mip_filter,
3103 false, coords, offsets, ilevel0,
3104 ilevel1, lod_fpart, texels);
3105 } else if (min_filter == mag_filter) {
3106 /* no need to distinguish between minification and magnification */
3107 lp_build_sample_mipmap(bld, min_filter, mip_filter,
3108 is_gather,
3109 coords, offsets,
3110 ilevel0, ilevel1, lod_fpart,
3111 texels);
3112 } else {
3113 /*
3114 * Could also get rid of the if-logic and always use mipmap_both, both
3115 * for the single lod and multi-lod case if nothing really uses this.
3116 */
3117 if (bld->num_lods == 1) {
3118 /* Emit conditional to choose min image filter or mag image filter
3119 * depending on the lod being > 0 or <= 0, respectively.
3120 */
3121 struct lp_build_if_state if_ctx;
3122
3123 lod_positive = LLVMBuildTrunc(builder, lod_positive,
3124 LLVMInt1TypeInContext(bld->gallivm->context),
3125 "lod_pos");
3126
3127 lp_build_if(&if_ctx, bld->gallivm, lod_positive);
3128 {
3129 /* Use the minification filter */
3130 lp_build_sample_mipmap(bld, min_filter, mip_filter, false,
3131 coords, offsets,
3132 ilevel0, ilevel1, lod_fpart,
3133 texels);
3134 }
3135 lp_build_else(&if_ctx);
3136 {
3137 /* Use the magnification filter */
3138 lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
3139 false,
3140 coords, offsets,
3141 ilevel0, NULL, NULL,
3142 texels);
3143 }
3144 lp_build_endif(&if_ctx);
3145 } else {
3146 LLVMValueRef need_linear, linear_mask;
3147 unsigned mip_filter_for_nearest;
3148 struct lp_build_if_state if_ctx;
3149
3150 if (min_filter == PIPE_TEX_FILTER_LINEAR) {
3151 linear_mask = lod_positive;
3152 mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
3153 } else {
3154 linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
3155 mip_filter_for_nearest = mip_filter;
3156 }
3157 need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
3158 linear_mask);
3159 lp_build_name(need_linear, "need_linear");
3160
3161 if (bld->num_lods != bld->coord_type.length) {
3162 linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
3163 bld->lodi_type,
3164 bld->int_coord_type,
3165 linear_mask);
3166 }
3167
3168 lp_build_if(&if_ctx, bld->gallivm, need_linear);
3169 {
3170 /*
3171 * Do sampling with both filters simultaneously. This means using
3172 * a linear filter and doing some tricks (with weights) for the
3173 * pixels which need nearest filter.
3174 * Note that it's probably rare some pixels need nearest and some
3175 * linear filter but the fixups required for the nearest pixels
3176 * aren't all that complicated so just always run a combined path
3177 * if at least some pixels require linear.
3178 */
3179 lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
3180 coords, offsets,
3181 ilevel0, ilevel1,
3182 lod_fpart, lod_positive,
3183 texels);
3184 }
3185 lp_build_else(&if_ctx);
3186 {
3187 /*
3188 * All pixels require just nearest filtering, which is way
3189 * cheaper than linear, hence do a separate path for that.
3190 */
3191 lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST,
3192 mip_filter_for_nearest, false,
3193 coords, offsets,
3194 ilevel0, ilevel1, lod_fpart,
3195 texels);
3196 }
3197 lp_build_endif(&if_ctx);
3198 }
3199 }
3200
3201 for (chan = 0; chan < 4; ++chan) {
3202 colors_out[chan] = LLVMBuildLoad2(builder, bld->texel_bld.vec_type, texels[chan], "");
3203 lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
3204 }
3205 }
3206
3207
3208 /**
3209 * Texel fetch function. In contrast to general sampling there is no
3210 * filtering, no coord minification, lod (if any) is always explicit uint,
3211 * coords are uints (in terms of texel units) directly to be applied to the
3212 * selected mip level (after adding texel offsets). This function handles
3213 * texel fetch for all targets where texel fetch is supported (no cube maps,
3214 * but 1d, 2d, 3d are supported, arrays and buffers should be too).
3215 */
3216 static void
lp_build_fetch_texel(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef ms_index,const LLVMValueRef * coords,LLVMValueRef explicit_lod,const LLVMValueRef * offsets,LLVMValueRef * colors_out)3217 lp_build_fetch_texel(struct lp_build_sample_context *bld,
3218 unsigned texture_unit,
3219 LLVMValueRef ms_index,
3220 const LLVMValueRef *coords,
3221 LLVMValueRef explicit_lod,
3222 const LLVMValueRef *offsets,
3223 LLVMValueRef *colors_out)
3224 {
3225 struct lp_build_context *perquadi_bld = &bld->lodi_bld;
3226 struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
3227 unsigned dims = bld->dims, chan;
3228 unsigned target = bld->static_texture_state->target;
3229 bool out_of_bound_ret_zero = true;
3230 LLVMValueRef size, ilevel;
3231 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
3232 LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
3233 LLVMValueRef width, height, depth, i, j;
3234 LLVMValueRef offset, out_of_bounds, out1;
3235
3236 LLVMValueRef first_level;
3237
3238 first_level = get_first_level(bld->gallivm,
3239 bld->resources_type,
3240 bld->resources_ptr,
3241 texture_unit, NULL,
3242 bld->static_texture_state,
3243 bld->dynamic_state);
3244 out_of_bounds = int_coord_bld->zero;
3245
3246 if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
3247 if (bld->num_mips != int_coord_bld->type.length) {
3248 ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
3249 perquadi_bld->type, explicit_lod, 0);
3250 } else {
3251 ilevel = explicit_lod;
3252 }
3253
3254 LLVMValueRef last_level;
3255
3256 last_level = get_last_level(bld->gallivm,
3257 bld->resources_type,
3258 bld->resources_ptr,
3259 texture_unit, NULL,
3260 bld->static_texture_state,
3261 bld->dynamic_state);
3262
3263 first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
3264 last_level = lp_build_broadcast_scalar(&bld->leveli_bld, last_level);
3265 lp_build_nearest_mip_level(bld,
3266 first_level, last_level,
3267 ilevel, &ilevel,
3268 out_of_bound_ret_zero ? &out_of_bounds : NULL);
3269 } else {
3270 assert(bld->num_mips == 1);
3271 if (bld->static_texture_state->target != PIPE_BUFFER) {
3272 ilevel = first_level;
3273 } else {
3274 ilevel = lp_build_const_int32(bld->gallivm, 0);
3275 }
3276 }
3277 lp_build_mipmap_level_sizes(bld, ilevel,
3278 &size,
3279 &row_stride_vec, &img_stride_vec);
3280 lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
3281 size, &width, &height, &depth);
3282
3283 if (target == PIPE_TEXTURE_1D_ARRAY ||
3284 target == PIPE_TEXTURE_2D_ARRAY) {
3285 if (out_of_bound_ret_zero) {
3286 z = lp_build_layer_coord(bld, texture_unit, false, z, &out1);
3287 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3288 } else {
3289 z = lp_build_layer_coord(bld, texture_unit, false, z, NULL);
3290 }
3291 }
3292
3293 /* This is a lot like border sampling */
3294 if (offsets[0]) {
3295 /*
3296 * coords are really unsigned, offsets are signed, but I don't think
3297 * exceeding 31 bits is possible
3298 */
3299 x = lp_build_add(int_coord_bld, x, offsets[0]);
3300 }
3301 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
3302 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3303 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
3304 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3305
3306 if (dims >= 2) {
3307 if (offsets[1]) {
3308 y = lp_build_add(int_coord_bld, y, offsets[1]);
3309 }
3310 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
3311 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3312 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
3313 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3314
3315 if (dims >= 3) {
3316 if (offsets[2]) {
3317 z = lp_build_add(int_coord_bld, z, offsets[2]);
3318 }
3319 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
3320 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3321 out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
3322 out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
3323 }
3324 }
3325
3326 if (bld->static_texture_state->tiled) {
3327 lp_build_tiled_sample_offset(&bld->int_coord_bld,
3328 bld->format_desc->format,
3329 bld->static_texture_state,
3330 x, y, z, width, height, img_stride_vec,
3331 &offset, &i, &j);
3332 } else {
3333 lp_build_sample_offset(int_coord_bld,
3334 bld->format_desc,
3335 x, y, z, row_stride_vec, img_stride_vec,
3336 &offset, &i, &j);
3337 }
3338
3339 if (bld->static_texture_state->target != PIPE_BUFFER) {
3340 offset = lp_build_add(int_coord_bld, offset,
3341 lp_build_get_mip_offsets(bld, ilevel));
3342 }
3343
3344 if (bld->fetch_ms && bld->static_texture_state->level_zero_only) {
3345 LLVMValueRef num_samples = bld->dynamic_state->last_level(bld->gallivm,
3346 bld->resources_type,
3347 bld->resources_ptr,
3348 texture_unit, NULL);
3349 num_samples = LLVMBuildZExt(bld->gallivm->builder, num_samples,
3350 bld->int_bld.elem_type, "");
3351 LLVMValueRef sample_stride = lp_sample_load_mip_value(bld->gallivm,
3352 bld->mip_offsets_type,
3353 bld->mip_offsets,
3354 lp_build_const_int32(bld->gallivm, LP_JIT_TEXTURE_SAMPLE_STRIDE));
3355 lp_build_sample_ms_offset(int_coord_bld, ms_index, num_samples, sample_stride,
3356 &offset, &out_of_bounds);
3357 }
3358
3359 if (bld->residency) {
3360 lp_build_gather_resident(&bld->float_vec_bld, bld->dynamic_state,
3361 bld->resources_type, bld->resources_ptr,
3362 offset, &bld->resident);
3363 }
3364
3365 offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
3366
3367 lp_build_fetch_rgba_soa(bld->gallivm,
3368 bld->format_desc,
3369 bld->texel_type, true,
3370 bld->base_ptr, offset,
3371 i, j,
3372 bld->cache,
3373 colors_out);
3374
3375 if (out_of_bound_ret_zero) {
3376 /*
3377 * Only needed for ARB_robust_buffer_access_behavior and d3d10.
3378 * Could use min/max above instead of out-of-bounds comparisons
3379 * if we don't care about the result returned for out-of-bounds.
3380 */
3381 LLVMValueRef oob[4] = {
3382 bld->texel_bld.zero,
3383 bld->texel_bld.zero,
3384 bld->texel_bld.zero,
3385 bld->texel_bld.zero,
3386 };
3387 lp_build_format_swizzle_soa(bld->format_desc, &bld->texel_bld, oob, oob);
3388 for (chan = 0; chan < 4; chan++) {
3389 colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
3390 oob[chan], colors_out[chan]);
3391 }
3392 }
3393 }
3394
3395
3396 /**
3397 * Just set texels to white instead of actually sampling the texture.
3398 * For debugging.
3399 */
3400 void
lp_build_sample_nop(struct gallivm_state * gallivm,struct lp_type type,const LLVMValueRef * coords,LLVMValueRef texel_out[4])3401 lp_build_sample_nop(struct gallivm_state *gallivm,
3402 struct lp_type type,
3403 const LLVMValueRef *coords,
3404 LLVMValueRef texel_out[4])
3405 {
3406 LLVMValueRef one = lp_build_one(gallivm, type);
3407 for (unsigned chan = 0; chan < 4; chan++) {
3408 texel_out[chan] = one;
3409 }
3410 }
3411
3412
3413 struct lp_type
lp_build_texel_type(struct lp_type texel_type,const struct util_format_description * format_desc)3414 lp_build_texel_type(struct lp_type texel_type,
3415 const struct util_format_description *format_desc)
3416 {
3417 /* always using the first channel hopefully should be safe,
3418 * if not things WILL break in other places anyway.
3419 */
3420 if (format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
3421 format_desc->channel[0].pure_integer) {
3422 if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
3423 texel_type = lp_type_int_vec(texel_type.width, texel_type.width * texel_type.length);
3424 } else if (format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
3425 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3426 }
3427 } else if (util_format_has_stencil(format_desc) &&
3428 !util_format_has_depth(format_desc)) {
3429 /* for stencil only formats, sample stencil (uint) */
3430 texel_type = lp_type_uint_vec(texel_type.width, texel_type.width * texel_type.length);
3431 }
3432 return texel_type;
3433 }
3434
3435
3436 /**
3437 * Build the actual texture sampling code.
3438 * 'texel' will return a vector of four LLVMValueRefs corresponding to
3439 * R, G, B, A.
3440 * \param type vector float type to use for coords, etc.
3441 * \param sample_key
3442 * \param derivs partial derivatives of (s,t,r,q) with respect to x and y
3443 */
3444 void
lp_build_sample_soa_code(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,unsigned sample_key,unsigned texture_index,unsigned sampler_index,LLVMTypeRef resources_type,LLVMValueRef resources_ptr,LLVMTypeRef thread_data_type,LLVMValueRef thread_data_ptr,const LLVMValueRef * coords,const LLVMValueRef * offsets,const struct lp_derivatives * derivs,LLVMValueRef lod,LLVMValueRef ms_index,LLVMValueRef aniso_filter_table,LLVMValueRef * texel_out)3445 lp_build_sample_soa_code(struct gallivm_state *gallivm,
3446 const struct lp_static_texture_state *static_texture_state,
3447 const struct lp_static_sampler_state *static_sampler_state,
3448 struct lp_sampler_dynamic_state *dynamic_state,
3449 struct lp_type type,
3450 unsigned sample_key,
3451 unsigned texture_index,
3452 unsigned sampler_index,
3453 LLVMTypeRef resources_type,
3454 LLVMValueRef resources_ptr,
3455 LLVMTypeRef thread_data_type,
3456 LLVMValueRef thread_data_ptr,
3457 const LLVMValueRef *coords,
3458 const LLVMValueRef *offsets,
3459 const struct lp_derivatives *derivs, /* optional */
3460 LLVMValueRef lod, /* optional */
3461 LLVMValueRef ms_index, /* optional */
3462 LLVMValueRef aniso_filter_table,
3463 LLVMValueRef *texel_out)
3464 {
3465 assert(static_texture_state);
3466 assert(static_texture_state->format < PIPE_FORMAT_COUNT);
3467 assert(static_sampler_state);
3468
3469 const enum pipe_texture_target target = static_texture_state->target;
3470 const unsigned dims = texture_dims(target);
3471 const unsigned num_quads = type.length == 1 ? 1 : type.length / 4;
3472 struct lp_build_sample_context bld;
3473 struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
3474 LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
3475 LLVMBuilderRef builder = gallivm->builder;
3476 const struct util_format_description *res_format_desc;
3477
3478 if (0) {
3479 enum pipe_format fmt = static_texture_state->format;
3480 debug_printf("Sample from %s\n", util_format_name(fmt));
3481 }
3482
3483 const enum lp_sampler_lod_property lod_property =
3484 (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
3485 LP_SAMPLER_LOD_PROPERTY_SHIFT;
3486 const enum lp_sampler_lod_control lod_control =
3487 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3488 LP_SAMPLER_LOD_CONTROL_SHIFT;
3489 const enum lp_sampler_op_type op_type =
3490 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3491 LP_SAMPLER_OP_TYPE_SHIFT;
3492
3493 const bool fetch_ms = !!(sample_key & LP_SAMPLER_FETCH_MS);
3494 const bool op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
3495 const bool op_is_lodq = op_type == LP_SAMPLER_OP_LODQ;
3496 const bool op_is_gather = op_type == LP_SAMPLER_OP_GATHER;
3497
3498 LLVMValueRef lod_bias = NULL;
3499 LLVMValueRef explicit_lod = NULL;
3500 if (lod_control == LP_SAMPLER_LOD_BIAS) {
3501 lod_bias = lod;
3502 assert(lod);
3503 assert(derivs == NULL);
3504 } else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3505 explicit_lod = lod;
3506 assert(lod);
3507 assert(derivs == NULL);
3508 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3509 assert(derivs);
3510 assert(lod == NULL);
3511 } else {
3512 assert(derivs == NULL);
3513 assert(lod == NULL);
3514 }
3515
3516 if (static_texture_state->format == PIPE_FORMAT_NONE) {
3517 /*
3518 * If there's nothing bound, format is NONE, and we must return
3519 * all zero as mandated by d3d10 in this case.
3520 */
3521 LLVMValueRef zero = lp_build_zero(gallivm, type);
3522 for (unsigned chan = 0; chan < 4; chan++) {
3523 texel_out[chan] = zero;
3524 }
3525 return;
3526 }
3527
3528 assert(type.floating);
3529
3530 /* Setup our build context */
3531 memset(&bld, 0, sizeof bld);
3532 bld.gallivm = gallivm;
3533 bld.resources_type = resources_type;
3534 bld.resources_ptr = resources_ptr;
3535 bld.aniso_filter_table = aniso_filter_table;
3536 bld.static_sampler_state = &derived_sampler_state;
3537 bld.static_texture_state = static_texture_state;
3538 bld.dynamic_state = dynamic_state;
3539 bld.format_desc = util_format_description(static_texture_state->format);
3540 bld.dims = dims;
3541
3542 res_format_desc = util_format_description(static_texture_state->res_format);
3543
3544 if (gallivm_perf & GALLIVM_PERF_NO_QUAD_LOD || op_is_lodq) {
3545 bld.no_quad_lod = true;
3546 }
3547 if (!(gallivm_perf & GALLIVM_PERF_RHO_APPROX) || op_is_lodq) {
3548 bld.no_rho_approx = true;
3549 }
3550 if (!(gallivm_perf & GALLIVM_PERF_BRILINEAR) || op_is_lodq || lod_bias || explicit_lod) {
3551 bld.no_brilinear = true;
3552 }
3553
3554 bld.vector_width = lp_type_width(type);
3555
3556 bld.float_type = lp_type_float(32);
3557 bld.int_type = lp_type_int(32);
3558 bld.coord_type = type;
3559 bld.int_coord_type = lp_int_type(type);
3560 bld.float_size_in_type = lp_type_float(32);
3561 bld.float_size_in_type.length = dims > 1 ? 4 : 1;
3562 bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
3563
3564 bld.texel_type = lp_build_texel_type(type, bld.format_desc);
3565
3566 if (!static_texture_state->level_zero_only ||
3567 !static_sampler_state->max_lod_pos || op_is_lodq) {
3568 derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
3569 } else {
3570 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3571 }
3572
3573 if (op_is_gather) {
3574 /*
3575 * gather4 is exactly like GL_LINEAR filtering but in the end skipping
3576 * the actual filtering. Using mostly the same paths, so cube face
3577 * selection, coord wrapping etc. all naturally uses the same code.
3578 */
3579 derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
3580 derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
3581 derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
3582 }
3583
3584 const enum pipe_tex_mipfilter mip_filter =
3585 derived_sampler_state.min_mip_filter;
3586
3587 if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3588 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3589 /*
3590 * Seamless filtering ignores wrap modes.
3591 * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
3592 * bilinear it's not correct but way better than using for instance
3593 * repeat. Note we even set this for non-seamless. Technically GL
3594 * allows any wrap mode, which made sense when supporting true borders
3595 * (can get seamless effect with border and CLAMP_TO_BORDER), but
3596 * gallium doesn't support borders and d3d9 requires wrap modes to be
3597 * ignored and it's a pain to fix up the sampler state (as it makes it
3598 * texture dependent).
3599 */
3600 derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3601 derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
3602 }
3603
3604 /*
3605 * We could force CLAMP to CLAMP_TO_EDGE here if min/mag filter is nearest,
3606 * so AoS path could be used. Not sure it's worth the trouble...
3607 */
3608 const enum pipe_tex_filter min_img_filter =
3609 derived_sampler_state.min_img_filter;
3610 const enum pipe_tex_filter mag_img_filter =
3611 derived_sampler_state.mag_img_filter;
3612
3613 /*
3614 * This is all a bit complicated different paths are chosen for performance
3615 * reasons.
3616 * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
3617 * everything (the last two options are equivalent for 4-wide case).
3618 * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
3619 * lod is calculated then the lod value extracted afterwards so making this
3620 * case basically the same as far as lod handling is concerned for the
3621 * further sample/filter code as the 1 lod for everything case.
3622 * Different lod handling mostly shows up when building mipmap sizes
3623 * (lp_build_mipmap_level_sizes() and friends) and also in filtering
3624 * (getting the fractional part of the lod to the right texels).
3625 */
3626
3627 /*
3628 * There are other situations where at least the multiple int lods could be
3629 * avoided like min and max lod being equal.
3630 */
3631 bld.num_mips = bld.num_lods = 1;
3632
3633 if ((mip_filter != PIPE_TEX_MIPFILTER_NONE && op_is_tex &&
3634 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3635 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)) ||
3636 op_is_lodq) {
3637 /*
3638 * special case for using per-pixel lod even for implicit lod,
3639 * which is generally never required (ok by APIs) except to please
3640 * some (somewhat broken imho) tests (because per-pixel face selection
3641 * can cause derivatives to be different for pixels outside the primitive
3642 * due to the major axis division even if pre-project derivatives are
3643 * looking normal).
3644 * For lodq, we do it to simply avoid scalar pack / unpack (albeit for
3645 * cube maps we do indeed get per-pixel lod values).
3646 */
3647 bld.num_mips = type.length;
3648 bld.num_lods = type.length;
3649 } else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
3650 (explicit_lod || lod_bias || derivs)) {
3651 if ((!op_is_tex && target != PIPE_BUFFER) ||
3652 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3653 bld.num_mips = type.length;
3654 bld.num_lods = type.length;
3655 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3656 bld.num_mips = 1;
3657 bld.num_lods = type.length;
3658 }
3659 }
3660 /* TODO: for true scalar_lod should only use 1 lod value */
3661 else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
3662 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3663 bld.num_mips = num_quads;
3664 bld.num_lods = num_quads;
3665 } else if (op_is_tex && min_img_filter != mag_img_filter) {
3666 bld.num_mips = 1;
3667 bld.num_lods = num_quads;
3668 }
3669
3670 bld.fetch_ms = fetch_ms;
3671 bld.residency = !!(sample_key & LP_SAMPLER_RESIDENCY);
3672 if (op_is_gather)
3673 bld.gather_comp = (sample_key & LP_SAMPLER_GATHER_COMP_MASK) >> LP_SAMPLER_GATHER_COMP_SHIFT;
3674 bld.lodf_type = type;
3675 /* we want native vector size to be able to use our intrinsics */
3676 if (bld.num_lods != type.length) {
3677 /* TODO: this currently always has to be per-quad or per-element */
3678 bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
3679 }
3680 bld.lodi_type = lp_int_type(bld.lodf_type);
3681 bld.levelf_type = bld.lodf_type;
3682 if (bld.num_mips == 1) {
3683 bld.levelf_type.length = 1;
3684 }
3685 bld.leveli_type = lp_int_type(bld.levelf_type);
3686 bld.float_size_type = bld.float_size_in_type;
3687
3688 /* Note: size vectors may not be native. They contain minified w/h/d/_
3689 * values, with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to
3690 * 8x4f32
3691 */
3692 if (bld.num_mips > 1) {
3693 bld.float_size_type.length = bld.num_mips == type.length ?
3694 bld.num_mips * bld.float_size_in_type.length :
3695 type.length;
3696 }
3697 bld.int_size_type = lp_int_type(bld.float_size_type);
3698
3699 lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
3700 lp_build_context_init(&bld.float_vec_bld, gallivm, type);
3701 lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
3702 lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
3703 lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
3704 lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
3705 lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
3706 lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
3707 lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
3708 lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
3709 lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
3710 lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
3711 lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
3712 lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
3713
3714 /* Get the dynamic state */
3715 LLVMValueRef tex_width = dynamic_state->width(gallivm, resources_type,
3716 resources_ptr, texture_index,
3717 NULL);
3718 bld.row_stride_array = dynamic_state->row_stride(gallivm, resources_type,
3719 resources_ptr, texture_index, NULL,
3720 &bld.row_stride_type);
3721 bld.img_stride_array = dynamic_state->img_stride(gallivm, resources_type,
3722 resources_ptr, texture_index, NULL,
3723 &bld.img_stride_type);
3724 bld.base_ptr = dynamic_state->base_ptr(gallivm, resources_type,
3725 resources_ptr, texture_index, NULL);
3726 bld.mip_offsets = dynamic_state->mip_offsets(gallivm, resources_type,
3727 resources_ptr, texture_index, NULL,
3728 &bld.mip_offsets_type);
3729
3730 /* Note that mip_offsets is an array[level] of offsets to texture images */
3731
3732 if (dynamic_state->cache_ptr && thread_data_ptr) {
3733 bld.cache = dynamic_state->cache_ptr(gallivm, thread_data_type,
3734 thread_data_ptr, texture_index);
3735 }
3736
3737 uint32_t res_bw = res_format_desc->block.width;
3738 uint32_t res_bh = res_format_desc->block.height;
3739 uint32_t bw = bld.format_desc->block.width;
3740 uint32_t bh = bld.format_desc->block.height;
3741
3742 /* only scale if the blocksizes are different. */
3743 if (res_bw == bw)
3744 res_bw = bw = 1;
3745 if (res_bh == bh)
3746 res_bh = bh = 1;
3747
3748 /* width, height, depth as single int vector */
3749 if (dims <= 1) {
3750 bld.int_size = tex_width;
3751 bld.int_tex_blocksize = LLVMConstInt(i32t, res_bw, 0);
3752 bld.int_tex_blocksize_log2 = LLVMConstInt(i32t, util_logbase2(res_bw), 0);
3753 bld.int_view_blocksize = LLVMConstInt(i32t, bw, 0);
3754 } else {
3755 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3756 tex_width,
3757 LLVMConstInt(i32t, 0, 0), "");
3758 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3759 LLVMConstInt(i32t, res_bw, 0),
3760 LLVMConstInt(i32t, 0, 0), "");
3761 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3762 LLVMConstInt(i32t, util_logbase2(res_bw), 0),
3763 LLVMConstInt(i32t, 0, 0), "");
3764 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
3765 LLVMConstInt(i32t, bw, 0),
3766 LLVMConstInt(i32t, 0, 0), "");
3767 if (dims >= 2) {
3768 LLVMValueRef tex_height =
3769 dynamic_state->height(gallivm, resources_type,
3770 resources_ptr, texture_index, NULL);
3771 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
3772 bld.int_bld.elem_type, "");
3773 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3774 tex_height,
3775 LLVMConstInt(i32t, 1, 0), "");
3776 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3777 LLVMConstInt(i32t, res_bh, 0),
3778 LLVMConstInt(i32t, 1, 0), "");
3779 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3780 LLVMConstInt(i32t, util_logbase2(res_bh), 0),
3781 LLVMConstInt(i32t, 1, 0), "");
3782 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3783 LLVMConstInt(i32t, bh, 0),
3784 LLVMConstInt(i32t, 1, 0), "");
3785 if (dims >= 3) {
3786 LLVMValueRef tex_depth =
3787 dynamic_state->depth(gallivm, resources_type, resources_ptr,
3788 texture_index, NULL);
3789 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
3790 bld.int_bld.elem_type, "");
3791 bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
3792 tex_depth,
3793 LLVMConstInt(i32t, 2, 0), "");
3794 bld.int_tex_blocksize = LLVMBuildInsertElement(builder, bld.int_tex_blocksize,
3795 LLVMConstInt(i32t, 1, 0),
3796 LLVMConstInt(i32t, 2, 0), "");
3797 bld.int_tex_blocksize_log2 = LLVMBuildInsertElement(builder, bld.int_tex_blocksize_log2,
3798 LLVMConstInt(i32t, 0, 0),
3799 LLVMConstInt(i32t, 2, 0), "");
3800 bld.int_view_blocksize = LLVMBuildInsertElement(builder, bld.int_view_blocksize,
3801 LLVMConstInt(i32t, 1, 0),
3802 LLVMConstInt(i32t, 2, 0), "");
3803 }
3804 }
3805 }
3806
3807 LLVMValueRef newcoords[5];
3808 for (unsigned i = 0; i < 5; i++) {
3809 newcoords[i] = coords[i];
3810 }
3811
3812 if (util_format_is_pure_integer(static_texture_state->format) &&
3813 !util_format_has_depth(bld.format_desc) && op_is_tex &&
3814 (static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_LINEAR ||
3815 static_sampler_state->min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3816 static_sampler_state->mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3817 /*
3818 * Bail if impossible filtering is specified (the awkard additional
3819 * depth check is because it is legal in gallium to have things like
3820 * S8Z24 here which would say it's pure int despite such formats should
3821 * sample the depth component).
3822 * In GL such filters make the texture incomplete, this makes it robust
3823 * against gallium frontends which set this up regardless (we'd crash in
3824 * the lerp later otherwise).
3825 * At least in some apis it may be legal to use such filters with lod
3826 * queries and/or gather (at least for gather d3d10 says only the wrap
3827 * bits are really used hence filter bits are likely simply ignored).
3828 * For fetch, we don't get valid samplers either way here.
3829 */
3830 LLVMValueRef zero = lp_build_zero(gallivm, type);
3831 for (unsigned chan = 0; chan < 4; chan++) {
3832 texel_out[chan] = zero;
3833 }
3834 return;
3835 }
3836
3837 if (0) {
3838 /* For debug: no-op texture sampling */
3839 lp_build_sample_nop(gallivm,
3840 bld.texel_type,
3841 newcoords,
3842 texel_out);
3843 } else if (op_type == LP_SAMPLER_OP_FETCH) {
3844 lp_build_fetch_texel(&bld, texture_index, ms_index, newcoords,
3845 lod, offsets, texel_out);
3846 if (bld.residency)
3847 texel_out[4] = bld.resident;
3848 } else {
3849 LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
3850 LLVMValueRef ilevel0 = NULL, ilevel1 = NULL, lod = NULL;
3851 bool use_aos = util_format_fits_8unorm(bld.format_desc) &&
3852 op_is_tex &&
3853 /* not sure this is strictly needed or simply impossible */
3854 derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
3855 derived_sampler_state.aniso == 0 &&
3856 lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
3857
3858 use_aos &= bld.num_lods <= num_quads ||
3859 derived_sampler_state.min_img_filter ==
3860 derived_sampler_state.mag_img_filter;
3861
3862 use_aos &= !static_texture_state->tiled;
3863
3864 if (gallivm_perf & GALLIVM_PERF_NO_AOS_SAMPLING) {
3865 use_aos = 0;
3866 }
3867
3868 if (dims > 1) {
3869 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
3870 if (dims > 2) {
3871 use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
3872 }
3873 }
3874 if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
3875 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3876 derived_sampler_state.seamless_cube_map &&
3877 (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
3878 derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
3879 /* theoretically possible with AoS filtering but not implemented (complex!) */
3880 use_aos = 0;
3881 }
3882
3883 if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
3884 !use_aos && util_format_fits_8unorm(bld.format_desc)) {
3885 debug_printf("%s: using floating point linear filtering for %s\n",
3886 __func__, bld.format_desc->short_name);
3887 debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d"
3888 " wraps %d wrapt %d wrapr %d\n",
3889 derived_sampler_state.min_img_filter,
3890 derived_sampler_state.mag_img_filter,
3891 derived_sampler_state.min_mip_filter,
3892 static_texture_state->target,
3893 derived_sampler_state.seamless_cube_map,
3894 derived_sampler_state.wrap_s,
3895 derived_sampler_state.wrap_t,
3896 derived_sampler_state.wrap_r);
3897 }
3898
3899 lp_build_sample_common(&bld, op_is_lodq, texture_index, sampler_index,
3900 newcoords, derivs, lod_bias, explicit_lod,
3901 &lod_positive, &lod, &lod_fpart,
3902 &ilevel0, &ilevel1);
3903
3904 if (op_is_lodq) {
3905 texel_out[0] = lod_fpart;
3906 texel_out[1] = lod;
3907 texel_out[2] = texel_out[3] = bld.coord_bld.zero;
3908 if (bld.residency)
3909 texel_out[4] = bld.resident;
3910 return;
3911 }
3912
3913 if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
3914 /* The aos path doesn't do seamless filtering so simply add cube layer
3915 * to face now.
3916 */
3917 newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
3918 }
3919
3920 /*
3921 * we only try 8-wide sampling with soa or if we have AVX2
3922 * as it appears to be a loss with just AVX)
3923 */
3924 if (num_quads == 1 || !use_aos ||
3925 (util_get_cpu_caps()->has_avx2 &&
3926 (bld.num_lods == 1 ||
3927 derived_sampler_state.min_img_filter == derived_sampler_state.mag_img_filter))) {
3928 if (use_aos) {
3929 /* do sampling/filtering with fixed pt arithmetic */
3930 lp_build_sample_aos(&bld,
3931 newcoords[0], newcoords[1],
3932 newcoords[2],
3933 offsets, lod_positive, lod_fpart,
3934 ilevel0, ilevel1,
3935 texel_out);
3936 } else {
3937 lp_build_sample_general(&bld, sampler_index,
3938 op_type == LP_SAMPLER_OP_GATHER,
3939 newcoords, offsets,
3940 lod_positive, lod_fpart,
3941 ilevel0, ilevel1,
3942 texel_out);
3943 if (bld.residency)
3944 texel_out[4] = bld.resident;
3945 }
3946 } else {
3947 struct lp_build_sample_context bld4;
3948 struct lp_type type4 = type;
3949 LLVMValueRef texelout4[4];
3950 LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
3951
3952 type4.length = 4;
3953
3954 /* Setup our build context */
3955 memset(&bld4, 0, sizeof bld4);
3956 bld4.no_quad_lod = bld.no_quad_lod;
3957 bld4.no_rho_approx = bld.no_rho_approx;
3958 bld4.no_brilinear = bld.no_brilinear;
3959 bld4.gallivm = bld.gallivm;
3960 bld4.resources_type = bld.resources_type;
3961 bld4.resources_ptr = bld.resources_ptr;
3962 bld4.aniso_filter_table = aniso_filter_table;
3963 bld4.static_texture_state = bld.static_texture_state;
3964 bld4.static_sampler_state = bld.static_sampler_state;
3965 bld4.dynamic_state = bld.dynamic_state;
3966 bld4.format_desc = bld.format_desc;
3967 bld4.dims = bld.dims;
3968 bld4.row_stride_type = bld.row_stride_type;
3969 bld4.row_stride_array = bld.row_stride_array;
3970 bld4.img_stride_type = bld.img_stride_type;
3971 bld4.img_stride_array = bld.img_stride_array;
3972 bld4.base_ptr = bld.base_ptr;
3973 bld4.mip_offsets_type = bld.mip_offsets_type;
3974 bld4.mip_offsets = bld.mip_offsets;
3975 bld4.int_size = bld.int_size;
3976 bld4.int_tex_blocksize = bld.int_tex_blocksize;
3977 bld4.int_tex_blocksize_log2 = bld.int_tex_blocksize_log2;
3978 bld4.int_view_blocksize = bld.int_view_blocksize;
3979 bld4.cache = bld.cache;
3980
3981 bld4.vector_width = lp_type_width(type4);
3982
3983 bld4.float_type = lp_type_float(32);
3984 bld4.int_type = lp_type_int(32);
3985 bld4.coord_type = type4;
3986 bld4.int_coord_type = lp_int_type(type4);
3987 bld4.float_size_in_type = lp_type_float(32);
3988 bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
3989 bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
3990 bld4.texel_type = bld.texel_type;
3991 bld4.texel_type.length = 4;
3992
3993 bld4.num_mips = bld4.num_lods = 1;
3994 if (bld4.no_quad_lod && bld4.no_rho_approx &&
3995 (static_texture_state->target == PIPE_TEXTURE_CUBE ||
3996 static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
3997 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
3998 bld4.num_mips = type4.length;
3999 bld4.num_lods = type4.length;
4000 }
4001 if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
4002 (explicit_lod || lod_bias || derivs)) {
4003 if ((!op_is_tex && target != PIPE_BUFFER) ||
4004 (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
4005 bld4.num_mips = type4.length;
4006 bld4.num_lods = type4.length;
4007 } else if (op_is_tex && min_img_filter != mag_img_filter) {
4008 bld4.num_mips = 1;
4009 bld4.num_lods = type4.length;
4010 }
4011 }
4012
4013 /* we want native vector size to be able to use our intrinsics */
4014 bld4.lodf_type = type4;
4015 if (bld4.num_lods != type4.length) {
4016 bld4.lodf_type.length = 1;
4017 }
4018 bld4.lodi_type = lp_int_type(bld4.lodf_type);
4019 bld4.levelf_type = type4;
4020 if (bld4.num_mips != type4.length) {
4021 bld4.levelf_type.length = 1;
4022 }
4023 bld4.leveli_type = lp_int_type(bld4.levelf_type);
4024 bld4.float_size_type = bld4.float_size_in_type;
4025 if (bld4.num_mips > 1) {
4026 bld4.float_size_type.length = bld4.num_mips == type4.length ?
4027 bld4.num_mips * bld4.float_size_in_type.length :
4028 type4.length;
4029 }
4030 bld4.int_size_type = lp_int_type(bld4.float_size_type);
4031
4032 lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
4033 lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
4034 lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
4035 lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
4036 lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
4037 lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
4038 lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
4039 lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
4040 lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
4041 lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
4042 lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
4043 lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
4044 lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
4045 lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
4046
4047 for (unsigned i = 0; i < num_quads; i++) {
4048 LLVMValueRef s4, t4, r4;
4049 LLVMValueRef lod_positive4, lod_fpart4 = NULL;
4050 LLVMValueRef ilevel04, ilevel14 = NULL;
4051 LLVMValueRef offsets4[4] = { NULL };
4052 unsigned num_lods = bld4.num_lods;
4053
4054 s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
4055 t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
4056 r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
4057
4058 if (offsets[0]) {
4059 offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
4060 if (dims > 1) {
4061 offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
4062 if (dims > 2) {
4063 offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
4064 }
4065 }
4066 }
4067 lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
4068 ilevel04 = bld.num_mips == 1 ? ilevel0 :
4069 lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
4070 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
4071 ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
4072 lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
4073 }
4074
4075 if (use_aos) {
4076 /* do sampling/filtering with fixed pt arithmetic */
4077 lp_build_sample_aos(&bld4,
4078 s4, t4, r4, offsets4,
4079 lod_positive4, lod_fpart4,
4080 ilevel04, ilevel14,
4081 texelout4);
4082 } else {
4083 /* this path is currently unreachable and hence might break easily... */
4084 LLVMValueRef newcoords4[5];
4085 newcoords4[0] = s4;
4086 newcoords4[1] = t4;
4087 newcoords4[2] = r4;
4088 newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
4089 newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
4090
4091 lp_build_sample_general(&bld4, sampler_index,
4092 op_type == LP_SAMPLER_OP_GATHER,
4093 newcoords4, offsets4,
4094 lod_positive4, lod_fpart4,
4095 ilevel04, ilevel14,
4096 texelout4);
4097 }
4098 for (unsigned j = 0; j < 4; j++) {
4099 texelouttmp[j][i] = texelout4[j];
4100 }
4101 }
4102
4103 for (unsigned j = 0; j < 4; j++) {
4104 texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
4105 }
4106 }
4107 }
4108
4109 if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
4110 apply_sampler_swizzle(&bld, texel_out);
4111 }
4112
4113 /*
4114 * texel type can be a (32bit) int/uint (for pure int formats only),
4115 * however we are expected to always return floats (storage is untyped).
4116 */
4117 if (!bld.texel_type.floating) {
4118 unsigned chan;
4119 for (chan = 0; chan < 4; chan++) {
4120 texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
4121 lp_build_vec_type(gallivm, type), "");
4122 }
4123 }
4124 }
4125
4126
4127 #define USE_TEX_FUNC_CALL 1
4128
4129 static inline void
get_target_info(enum pipe_texture_target target,unsigned * num_coords,unsigned * num_derivs,unsigned * num_offsets,unsigned * layer)4130 get_target_info(enum pipe_texture_target target,
4131 unsigned *num_coords, unsigned *num_derivs,
4132 unsigned *num_offsets, unsigned *layer)
4133 {
4134 unsigned dims = texture_dims(target);
4135 *num_coords = dims;
4136 *num_offsets = dims;
4137 *num_derivs = (target == PIPE_TEXTURE_CUBE ||
4138 target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
4139 *layer = has_layer_coord(target) ? 2: 0;
4140 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4141 /*
4142 * dims doesn't include r coord for cubes - this is handled
4143 * by layer instead, but need to fix up for cube arrays...
4144 */
4145 *layer = 3;
4146 *num_coords = 3;
4147 }
4148 }
4149
4150
4151 /**
4152 * Generate the function body for a texture sampling function.
4153 */
4154 static void
lp_build_sample_gen_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct lp_type type,LLVMTypeRef resources_type,LLVMTypeRef thread_data_type,unsigned texture_index,unsigned sampler_index,LLVMValueRef function,unsigned num_args,unsigned sample_key,bool has_aniso_filter_table)4155 lp_build_sample_gen_func(struct gallivm_state *gallivm,
4156 const struct lp_static_texture_state *static_texture_state,
4157 const struct lp_static_sampler_state *static_sampler_state,
4158 struct lp_sampler_dynamic_state *dynamic_state,
4159 struct lp_type type,
4160 LLVMTypeRef resources_type,
4161 LLVMTypeRef thread_data_type,
4162 unsigned texture_index,
4163 unsigned sampler_index,
4164 LLVMValueRef function,
4165 unsigned num_args,
4166 unsigned sample_key,
4167 bool has_aniso_filter_table)
4168 {
4169 LLVMBuilderRef old_builder;
4170 LLVMBasicBlockRef block;
4171 LLVMValueRef coords[5];
4172 LLVMValueRef offsets[3] = { NULL };
4173 LLVMValueRef lod = NULL;
4174 LLVMValueRef ms_index = NULL;
4175 LLVMValueRef resources_ptr;
4176 LLVMValueRef thread_data_ptr = NULL;
4177 LLVMValueRef aniso_filter_table = NULL;
4178 LLVMValueRef texel_out[4];
4179 struct lp_derivatives derivs;
4180 struct lp_derivatives *deriv_ptr = NULL;
4181 unsigned num_param = 0;
4182 unsigned num_coords, num_derivs, num_offsets, layer;
4183 bool need_cache = false;
4184
4185 const enum lp_sampler_lod_control lod_control =
4186 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK)
4187 >> LP_SAMPLER_LOD_CONTROL_SHIFT;
4188
4189 const enum lp_sampler_op_type op_type =
4190 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4191
4192 get_target_info(static_texture_state->target,
4193 &num_coords, &num_derivs, &num_offsets, &layer);
4194
4195 /* lod query doesn't take a layer */
4196 if (layer && op_type == LP_SAMPLER_OP_LODQ)
4197 layer = 0;
4198
4199 if (dynamic_state->cache_ptr) {
4200 const struct util_format_description *format_desc;
4201 format_desc = util_format_description(static_texture_state->format);
4202 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4203 need_cache = true;
4204 }
4205 }
4206
4207 /* "unpack" arguments */
4208 resources_ptr = LLVMGetParam(function, num_param++);
4209 if (has_aniso_filter_table)
4210 aniso_filter_table = LLVMGetParam(function, num_param++);
4211 if (need_cache) {
4212 thread_data_ptr = LLVMGetParam(function, num_param++);
4213 }
4214 for (unsigned i = 0; i < num_coords; i++) {
4215 coords[i] = LLVMGetParam(function, num_param++);
4216 }
4217 for (unsigned i = num_coords; i < 5; i++) {
4218 /* This is rather unfortunate... */
4219 coords[i] = lp_build_undef(gallivm, type);
4220 }
4221 if (layer) {
4222 coords[layer] = LLVMGetParam(function, num_param++);
4223 }
4224 if (sample_key & LP_SAMPLER_SHADOW) {
4225 coords[4] = LLVMGetParam(function, num_param++);
4226 }
4227 if (sample_key & LP_SAMPLER_FETCH_MS) {
4228 ms_index = LLVMGetParam(function, num_param++);
4229 }
4230 if (sample_key & LP_SAMPLER_OFFSETS) {
4231 for (unsigned i = 0; i < num_offsets; i++) {
4232 offsets[i] = LLVMGetParam(function, num_param++);
4233 }
4234 }
4235 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4236 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4237 lod = LLVMGetParam(function, num_param++);
4238 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4239 for (unsigned i = 0; i < num_derivs; i++) {
4240 derivs.ddx[i] = LLVMGetParam(function, num_param++);
4241 derivs.ddy[i] = LLVMGetParam(function, num_param++);
4242 }
4243 deriv_ptr = &derivs;
4244 }
4245
4246 assert(num_args == num_param);
4247
4248 /*
4249 * Function body
4250 */
4251
4252 old_builder = gallivm->builder;
4253 block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
4254 gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
4255 LLVMPositionBuilderAtEnd(gallivm->builder, block);
4256
4257 lp_build_sample_soa_code(gallivm,
4258 static_texture_state,
4259 static_sampler_state,
4260 dynamic_state,
4261 type,
4262 sample_key,
4263 texture_index,
4264 sampler_index,
4265 resources_type,
4266 resources_ptr,
4267 thread_data_type,
4268 thread_data_ptr,
4269 coords,
4270 offsets,
4271 deriv_ptr,
4272 lod,
4273 ms_index,
4274 aniso_filter_table,
4275 texel_out);
4276
4277 LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
4278
4279 LLVMDisposeBuilder(gallivm->builder);
4280 gallivm->builder = old_builder;
4281
4282 gallivm_verify_function(gallivm, function);
4283 }
4284
4285
4286 /**
4287 * Call the matching function for texture sampling.
4288 * If there's no match, generate a new one.
4289 */
4290 static void
lp_build_sample_soa_func(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_params * params,unsigned texture_index,unsigned sampler_index,LLVMValueRef * tex_ret)4291 lp_build_sample_soa_func(struct gallivm_state *gallivm,
4292 const struct lp_static_texture_state *static_texture_state,
4293 const struct lp_static_sampler_state *static_sampler_state,
4294 struct lp_sampler_dynamic_state *dynamic_state,
4295 const struct lp_sampler_params *params,
4296 unsigned texture_index, unsigned sampler_index,
4297 LLVMValueRef *tex_ret)
4298 {
4299 LLVMBuilderRef builder = gallivm->builder;
4300 LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
4301 LLVMGetInsertBlock(builder)));
4302 LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
4303 unsigned sample_key = params->sample_key;
4304 const LLVMValueRef *coords = params->coords;
4305 const LLVMValueRef *offsets = params->offsets;
4306 const struct lp_derivatives *derivs = params->derivs;
4307
4308 const enum lp_sampler_lod_control lod_control =
4309 (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
4310 LP_SAMPLER_LOD_CONTROL_SHIFT;
4311
4312 const enum lp_sampler_op_type op_type =
4313 (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> LP_SAMPLER_OP_TYPE_SHIFT;
4314
4315 unsigned num_coords, num_derivs, num_offsets, layer;
4316 get_target_info(static_texture_state->target,
4317 &num_coords, &num_derivs, &num_offsets, &layer);
4318
4319 /* lod query doesn't take a layer */
4320 if (layer && op_type == LP_SAMPLER_OP_LODQ)
4321 layer = 0;
4322
4323 bool need_cache = false;
4324 if (dynamic_state->cache_ptr) {
4325 const struct util_format_description *format_desc;
4326 format_desc = util_format_description(static_texture_state->format);
4327 if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
4328 need_cache = true;
4329 }
4330 }
4331
4332 /*
4333 * texture function matches are found by name.
4334 * Thus the name has to include both the texture and sampler unit
4335 * (which covers all static state) plus the actual texture function
4336 * (including things like offsets, shadow coord, lod control).
4337 * Additionally lod_property has to be included too.
4338 */
4339 char func_name[64];
4340 snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
4341 texture_index, sampler_index, sample_key);
4342
4343 LLVMValueRef function = LLVMGetNamedFunction(module, func_name);
4344 LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
4345 LLVMTypeRef ret_type;
4346 LLVMTypeRef val_type[4];
4347 unsigned num_param = 0;
4348
4349 /*
4350 * Generate the function prototype.
4351 */
4352
4353 arg_types[num_param++] = LLVMTypeOf(params->resources_ptr);
4354 if (params->aniso_filter_table)
4355 arg_types[num_param++] = LLVMTypeOf(params->aniso_filter_table);
4356 if (need_cache) {
4357 arg_types[num_param++] = LLVMTypeOf(params->thread_data_ptr);
4358 }
4359 for (unsigned i = 0; i < num_coords; i++) {
4360 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4361 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
4362 }
4363 if (layer) {
4364 arg_types[num_param++] = LLVMTypeOf(coords[layer]);
4365 assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
4366 }
4367 if (sample_key & LP_SAMPLER_SHADOW) {
4368 arg_types[num_param++] = LLVMTypeOf(coords[0]);
4369 }
4370 if (sample_key & LP_SAMPLER_FETCH_MS) {
4371 arg_types[num_param++] = LLVMTypeOf(params->ms_index);
4372 }
4373 if (sample_key & LP_SAMPLER_OFFSETS) {
4374 for (unsigned i = 0; i < num_offsets; i++) {
4375 arg_types[num_param++] = LLVMTypeOf(offsets[0]);
4376 assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
4377 }
4378 }
4379 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4380 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4381 arg_types[num_param++] = LLVMTypeOf(params->lod);
4382 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4383 for (unsigned i = 0; i < num_derivs; i++) {
4384 arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
4385 arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
4386 assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
4387 assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
4388 }
4389 }
4390
4391 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
4392 lp_build_vec_type(gallivm, params->type);
4393 ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
4394 LLVMTypeRef function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
4395
4396 if (!function) {
4397 function = LLVMAddFunction(module, func_name, function_type);
4398
4399 for (unsigned i = 0; i < num_param; ++i) {
4400 if (LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
4401
4402 lp_add_function_attr(function, i + 1, LP_FUNC_ATTR_NOALIAS);
4403 }
4404 }
4405
4406 LLVMSetFunctionCallConv(function, LLVMFastCallConv);
4407 LLVMSetLinkage(function, LLVMInternalLinkage);
4408
4409 lp_build_sample_gen_func(gallivm,
4410 static_texture_state,
4411 static_sampler_state,
4412 dynamic_state,
4413 params->type,
4414 params->resources_type,
4415 params->thread_data_type,
4416 texture_index,
4417 sampler_index,
4418 function,
4419 num_param,
4420 sample_key,
4421 params->aniso_filter_table ? true : false);
4422 }
4423
4424 unsigned num_args = 0;
4425 args[num_args++] = params->resources_ptr;
4426 if (params->aniso_filter_table)
4427 args[num_args++] = params->aniso_filter_table;
4428 if (need_cache) {
4429 args[num_args++] = params->thread_data_ptr;
4430 }
4431 for (unsigned i = 0; i < num_coords; i++) {
4432 args[num_args++] = coords[i];
4433 }
4434 if (layer) {
4435 args[num_args++] = coords[layer];
4436 }
4437 if (sample_key & LP_SAMPLER_SHADOW) {
4438 args[num_args++] = coords[4];
4439 }
4440 if (sample_key & LP_SAMPLER_FETCH_MS) {
4441 args[num_args++] = params->ms_index;
4442 }
4443 if (sample_key & LP_SAMPLER_OFFSETS) {
4444 for (unsigned i = 0; i < num_offsets; i++) {
4445 args[num_args++] = offsets[i];
4446 }
4447 }
4448 if (lod_control == LP_SAMPLER_LOD_BIAS ||
4449 lod_control == LP_SAMPLER_LOD_EXPLICIT) {
4450 args[num_args++] = params->lod;
4451 } else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
4452 for (unsigned i = 0; i < num_derivs; i++) {
4453 args[num_args++] = derivs->ddx[i];
4454 args[num_args++] = derivs->ddy[i];
4455 }
4456 }
4457
4458 assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
4459
4460 *tex_ret = LLVMBuildCall2(builder, function_type, function, args, num_args, "");
4461 LLVMBasicBlockRef bb = LLVMGetInsertBlock(builder);
4462 LLVMValueRef inst = LLVMGetLastInstruction(bb);
4463 LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
4464 }
4465
4466
4467 /**
4468 * Build texture sampling code.
4469 * Either via a function call or inline it directly.
4470 */
4471 void
lp_build_sample_soa(const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_sampler_params * params)4472 lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
4473 const struct lp_static_sampler_state *static_sampler_state,
4474 struct lp_sampler_dynamic_state *dynamic_state,
4475 struct gallivm_state *gallivm,
4476 const struct lp_sampler_params *params)
4477 {
4478 bool use_tex_func = false;
4479
4480 /*
4481 * Do not use a function call if the sampling is "simple enough".
4482 * We define this by
4483 * a) format
4484 * b) no mips (either one level only or no mip filter)
4485 * No mips will definitely make the code smaller, though
4486 * the format requirement is a bit iffy - there's some (SoA) formats
4487 * which definitely generate less code. This does happen to catch
4488 * some important cases though which are hurt quite a bit by using
4489 * a call (though not really because of the call overhead but because
4490 * they are reusing the same texture unit with some of the same
4491 * parameters).
4492 * Ideally we'd let llvm recognize this stuff by doing IPO passes.
4493 */
4494
4495 if (USE_TEX_FUNC_CALL) {
4496 const struct util_format_description *format_desc =
4497 util_format_description(static_texture_state->format);
4498 const bool simple_format =
4499 (util_format_is_rgba8_variant(format_desc) &&
4500 format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
4501 const enum lp_sampler_op_type op_type =
4502 (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
4503 LP_SAMPLER_OP_TYPE_SHIFT;
4504 const bool simple_tex =
4505 op_type != LP_SAMPLER_OP_TEXTURE ||
4506 ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
4507 static_texture_state->level_zero_only == true) &&
4508 static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
4509
4510 use_tex_func = !(simple_format && simple_tex);
4511 }
4512
4513 if (use_tex_func) {
4514 LLVMValueRef tex_ret;
4515 lp_build_sample_soa_func(gallivm,
4516 static_texture_state,
4517 static_sampler_state,
4518 dynamic_state,
4519 params, params->texture_index,
4520 params->sampler_index, &tex_ret);
4521
4522 for (unsigned i = 0; i < 4; i++) {
4523 params->texel[i] =
4524 LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
4525 }
4526 } else {
4527 lp_build_sample_soa_code(gallivm,
4528 static_texture_state,
4529 static_sampler_state,
4530 dynamic_state,
4531 params->type,
4532 params->sample_key,
4533 params->texture_index,
4534 params->sampler_index,
4535 params->resources_type,
4536 params->resources_ptr,
4537 params->thread_data_type,
4538 params->thread_data_ptr,
4539 params->coords,
4540 params->offsets,
4541 params->derivs,
4542 params->lod,
4543 params->ms_index,
4544 params->aniso_filter_table,
4545 params->texel);
4546 }
4547 }
4548
4549
4550 void
lp_build_size_query_soa(struct gallivm_state * gallivm,const struct lp_static_texture_state * static_state,struct lp_sampler_dynamic_state * dynamic_state,const struct lp_sampler_size_query_params * params)4551 lp_build_size_query_soa(struct gallivm_state *gallivm,
4552 const struct lp_static_texture_state *static_state,
4553 struct lp_sampler_dynamic_state *dynamic_state,
4554 const struct lp_sampler_size_query_params *params)
4555 {
4556 LLVMValueRef first_level = NULL;
4557 const unsigned num_lods = 1;
4558 LLVMTypeRef resources_type = params->resources_type;
4559 LLVMValueRef resources_ptr = params->resources_ptr;
4560 const unsigned texture_unit = params->texture_unit;
4561 const enum pipe_texture_target target = params->target;
4562 LLVMValueRef texture_unit_offset = params->texture_unit_offset;
4563 const struct util_format_description *format_desc =
4564 util_format_description(static_state->format);
4565 const struct util_format_description *res_format_desc =
4566 util_format_description(static_state->res_format);
4567
4568 if (static_state->format == PIPE_FORMAT_NONE) {
4569 /*
4570 * If there's nothing bound, format is NONE, and we must return
4571 * all zero as mandated by d3d10 in this case.
4572 */
4573 LLVMValueRef zero = lp_build_const_vec(gallivm, params->int_type, 0.0F);
4574 for (unsigned chan = 0; chan < 4; chan++) {
4575 params->sizes_out[chan] = zero;
4576 }
4577 return;
4578 }
4579
4580 /*
4581 * Do some sanity verification about bound texture and shader dcl target.
4582 * Not entirely sure what's possible but assume array/non-array
4583 * always compatible (probably not ok for OpenGL but d3d10 has no
4584 * distinction of arrays at the resource level).
4585 * Everything else looks bogus (though not entirely sure about rect/2d).
4586 * Currently disabled because it causes assertion failures if there's
4587 * nothing bound (or rather a dummy texture, not that this case would
4588 * return the right values).
4589 */
4590 if (0 && static_state->target != target) {
4591 if (static_state->target == PIPE_TEXTURE_1D)
4592 assert(target == PIPE_TEXTURE_1D_ARRAY);
4593 else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
4594 assert(target == PIPE_TEXTURE_1D);
4595 else if (static_state->target == PIPE_TEXTURE_2D)
4596 assert(target == PIPE_TEXTURE_2D_ARRAY);
4597 else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
4598 assert(target == PIPE_TEXTURE_2D);
4599 else if (static_state->target == PIPE_TEXTURE_CUBE)
4600 assert(target == PIPE_TEXTURE_CUBE_ARRAY);
4601 else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
4602 assert(target == PIPE_TEXTURE_CUBE);
4603 else
4604 assert(0);
4605 }
4606
4607 const unsigned dims = texture_dims(target);
4608
4609 const bool has_array = has_layer_coord(target);
4610
4611 assert(!params->int_type.floating);
4612
4613 struct lp_build_context bld_int_vec4;
4614 lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
4615
4616 if (params->samples_only) {
4617 LLVMValueRef num_samples;
4618 if (params->ms && static_state->level_zero_only) {
4619 /* multisample never has levels. */
4620 num_samples = dynamic_state->last_level(gallivm,
4621 resources_type,
4622 resources_ptr,
4623 texture_unit,
4624 texture_unit_offset);
4625 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
4626 bld_int_vec4.elem_type, "");
4627 } else {
4628 num_samples = lp_build_const_int32(gallivm, 0);
4629 }
4630 params->sizes_out[0] =
4631 lp_build_broadcast(gallivm,
4632 lp_build_vec_type(gallivm, params->int_type),
4633 num_samples);
4634 return;
4635 }
4636
4637 LLVMValueRef lod;
4638 LLVMValueRef level = 0;
4639 if (params->explicit_lod) {
4640 /* FIXME: this needs to honor per-element lod */
4641 lod = LLVMBuildExtractElement(gallivm->builder, params->explicit_lod,
4642 lp_build_const_int32(gallivm, 0), "");
4643 first_level = get_first_level(gallivm, resources_type, resources_ptr,
4644 texture_unit, texture_unit_offset,
4645 static_state, dynamic_state);
4646 level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
4647 lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
4648 } else {
4649 lod = bld_int_vec4.zero;
4650 }
4651
4652 LLVMValueRef size = bld_int_vec4.undef;
4653 LLVMValueRef tex_blocksize = bld_int_vec4.undef;
4654 LLVMValueRef tex_blocksize_log2 = bld_int_vec4.undef;
4655 LLVMValueRef view_blocksize = bld_int_vec4.undef;
4656
4657 uint32_t res_bw = res_format_desc->block.width;
4658 uint32_t res_bh = res_format_desc->block.height;
4659 uint32_t bw = format_desc->block.width;
4660 uint32_t bh = format_desc->block.height;
4661
4662 /* only scale if the blocksizes are different. */
4663 if (res_bw == bw)
4664 res_bw = bw = 1;
4665 if (res_bh == bh)
4666 res_bh = bh = 1;
4667
4668 LLVMValueRef tex_width = dynamic_state->width(gallivm,
4669 resources_type,
4670 resources_ptr,
4671 texture_unit,
4672 texture_unit_offset);
4673 size = LLVMBuildInsertElement(gallivm->builder, size,
4674 tex_width,
4675 lp_build_const_int32(gallivm, 0), "");
4676 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4677 lp_build_const_int32(gallivm, res_bw),
4678 lp_build_const_int32(gallivm, 0), "");
4679 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4680 lp_build_const_int32(gallivm, util_logbase2(res_bw)),
4681 lp_build_const_int32(gallivm, 0), "");
4682 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4683 lp_build_const_int32(gallivm, bw),
4684 lp_build_const_int32(gallivm, 0), "");
4685 if (dims >= 2) {
4686 LLVMValueRef tex_height =
4687 dynamic_state->height(gallivm, resources_type,
4688 resources_ptr, texture_unit, texture_unit_offset);
4689 tex_height = LLVMBuildZExt(gallivm->builder, tex_height,
4690 bld_int_vec4.elem_type, "");
4691 size = LLVMBuildInsertElement(gallivm->builder, size, tex_height,
4692 lp_build_const_int32(gallivm, 1), "");
4693 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4694 lp_build_const_int32(gallivm, res_bh),
4695 lp_build_const_int32(gallivm, 1), "");
4696 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4697 lp_build_const_int32(gallivm, util_logbase2(res_bh)),
4698 lp_build_const_int32(gallivm, 1), "");
4699 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4700 lp_build_const_int32(gallivm, bh),
4701 lp_build_const_int32(gallivm, 1), "");
4702 }
4703
4704 if (dims >= 3) {
4705 LLVMValueRef tex_depth =
4706 dynamic_state->depth(gallivm, resources_type,
4707 resources_ptr, texture_unit, texture_unit_offset);
4708 tex_depth = LLVMBuildZExt(gallivm->builder, tex_depth,
4709 bld_int_vec4.elem_type, "");
4710 size = LLVMBuildInsertElement(gallivm->builder, size, tex_depth,
4711 lp_build_const_int32(gallivm, 2), "");
4712 tex_blocksize = LLVMBuildInsertElement(gallivm->builder, tex_blocksize,
4713 lp_build_const_int32(gallivm, 1),
4714 lp_build_const_int32(gallivm, 2), "");
4715 tex_blocksize_log2 = LLVMBuildInsertElement(gallivm->builder, tex_blocksize_log2,
4716 lp_build_const_int32(gallivm, 0),
4717 lp_build_const_int32(gallivm, 2), "");
4718 view_blocksize = LLVMBuildInsertElement(gallivm->builder, view_blocksize,
4719 lp_build_const_int32(gallivm, 1),
4720 lp_build_const_int32(gallivm, 2), "");
4721 }
4722
4723 size = lp_build_minify(&bld_int_vec4, size, lod, true);
4724 size = lp_build_scale_view_dims(&bld_int_vec4, size, tex_blocksize,
4725 tex_blocksize_log2, view_blocksize);
4726
4727 if (has_array) {
4728 LLVMValueRef layers = dynamic_state->depth(gallivm, resources_type,
4729 resources_ptr, texture_unit,
4730 texture_unit_offset);
4731 layers = LLVMBuildZExt(gallivm->builder, layers,
4732 bld_int_vec4.elem_type, "");
4733 if (target == PIPE_TEXTURE_CUBE_ARRAY) {
4734 /*
4735 * It looks like GL wants number of cubes, d3d10.1 has it undefined?
4736 * Could avoid this by passing in number of cubes instead of total
4737 * number of layers (might make things easier elsewhere too).
4738 */
4739 LLVMValueRef six = lp_build_const_int32(gallivm, 6);
4740 layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
4741 }
4742 size = LLVMBuildInsertElement(gallivm->builder, size, layers,
4743 lp_build_const_int32(gallivm, dims), "");
4744 }
4745
4746 /*
4747 * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
4748 * if level is out of bounds (note this can't cover unbound texture
4749 * here, which also requires returning zero).
4750 */
4751 if (params->explicit_lod && params->is_sviewinfo) {
4752 LLVMValueRef last_level, out, out1;
4753 struct lp_build_context leveli_bld;
4754
4755 /* everything is scalar for now */
4756 lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
4757 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4758 texture_unit, texture_unit_offset,
4759 static_state, dynamic_state);
4760
4761 out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
4762 out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
4763 out = lp_build_or(&leveli_bld, out, out1);
4764 if (num_lods == 1) {
4765 out = lp_build_broadcast_scalar(&bld_int_vec4, out);
4766 } else {
4767 /* TODO */
4768 assert(0);
4769 }
4770 size = lp_build_andnot(&bld_int_vec4, size, out);
4771 }
4772
4773 unsigned i;
4774 for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
4775 params->sizes_out[i] =
4776 lp_build_extract_broadcast(gallivm, bld_int_vec4.type,
4777 params->int_type,
4778 size,
4779 lp_build_const_int32(gallivm, i));
4780 }
4781 if (params->is_sviewinfo) {
4782 for (; i < 4; i++) {
4783 params->sizes_out[i] = lp_build_const_vec(gallivm,
4784 params->int_type, 0.0);
4785 }
4786 }
4787
4788 /*
4789 * if there's no explicit_lod (buffers, rects) queries requiring nr of
4790 * mips would be illegal.
4791 */
4792 if (params->is_sviewinfo && params->explicit_lod) {
4793 struct lp_build_context bld_int_scalar;
4794 lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
4795
4796 LLVMValueRef num_levels;
4797 if (static_state->level_zero_only) {
4798 num_levels = bld_int_scalar.one;
4799 } else {
4800 LLVMValueRef last_level;
4801 last_level = get_last_level(gallivm, resources_type, resources_ptr,
4802 texture_unit, texture_unit_offset,
4803 static_state, dynamic_state);
4804 num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
4805 num_levels = lp_build_add(&bld_int_scalar, num_levels,
4806 bld_int_scalar.one);
4807 }
4808 params->sizes_out[3] =
4809 lp_build_broadcast(gallivm,
4810 lp_build_vec_type(gallivm, params->int_type),
4811 num_levels);
4812 }
4813
4814 if (target == PIPE_BUFFER) {
4815 struct lp_build_context bld_int;
4816 lp_build_context_init(&bld_int, gallivm, params->int_type);
4817
4818 params->sizes_out[0] = lp_build_min(&bld_int, params->sizes_out[0],
4819 lp_build_const_int_vec(gallivm, params->int_type, LP_MAX_TEXEL_BUFFER_ELEMENTS));
4820 }
4821 }
4822
4823
4824 static void
lp_build_do_atomic_soa(struct gallivm_state * gallivm,const struct util_format_description * format_desc,struct lp_type type,LLVMValueRef exec_mask,LLVMValueRef base_ptr,LLVMValueRef offset,LLVMValueRef out_of_bounds,unsigned img_op,LLVMAtomicRMWBinOp op,const LLVMValueRef rgba_in[4],const LLVMValueRef rgba2_in[4],LLVMValueRef atomic_result[4])4825 lp_build_do_atomic_soa(struct gallivm_state *gallivm,
4826 const struct util_format_description *format_desc,
4827 struct lp_type type,
4828 LLVMValueRef exec_mask,
4829 LLVMValueRef base_ptr,
4830 LLVMValueRef offset,
4831 LLVMValueRef out_of_bounds,
4832 unsigned img_op,
4833 LLVMAtomicRMWBinOp op,
4834 const LLVMValueRef rgba_in[4],
4835 const LLVMValueRef rgba2_in[4],
4836 LLVMValueRef atomic_result[4])
4837 {
4838 const enum pipe_format format = format_desc->format;
4839
4840 bool valid = format == PIPE_FORMAT_R32_UINT ||
4841 format == PIPE_FORMAT_R32_SINT ||
4842 format == PIPE_FORMAT_R32_FLOAT;
4843
4844 bool integer = format != PIPE_FORMAT_R32_FLOAT;
4845 if (img_op == LP_IMG_ATOMIC) {
4846 switch (op) {
4847 case LLVMAtomicRMWBinOpAdd:
4848 case LLVMAtomicRMWBinOpSub:
4849 case LLVMAtomicRMWBinOpAnd:
4850 case LLVMAtomicRMWBinOpNand:
4851 case LLVMAtomicRMWBinOpOr:
4852 case LLVMAtomicRMWBinOpXor:
4853 case LLVMAtomicRMWBinOpMax:
4854 case LLVMAtomicRMWBinOpMin:
4855 case LLVMAtomicRMWBinOpUMax:
4856 case LLVMAtomicRMWBinOpUMin:
4857 valid &= integer;
4858 break;
4859 case LLVMAtomicRMWBinOpFAdd:
4860 case LLVMAtomicRMWBinOpFSub:
4861 #if LLVM_VERSION_MAJOR >= 15
4862 case LLVMAtomicRMWBinOpFMax:
4863 case LLVMAtomicRMWBinOpFMin:
4864 #endif
4865 valid &= !integer;
4866 break;
4867 default:
4868 break;
4869 }
4870 } else {
4871 valid &= integer;
4872 }
4873
4874 if (!valid) {
4875 atomic_result[0] = lp_build_zero(gallivm, type);
4876 return;
4877 }
4878
4879 LLVMTypeRef ref_type = (format == PIPE_FORMAT_R32_FLOAT) ?
4880 LLVMFloatTypeInContext(gallivm->context) :
4881 LLVMInt32TypeInContext(gallivm->context);
4882
4883 LLVMTypeRef atom_res_elem_type =
4884 LLVMVectorType(ref_type, type.length);
4885 LLVMValueRef atom_res = lp_build_alloca(gallivm, atom_res_elem_type, "");
4886
4887 offset = LLVMBuildGEP2(gallivm->builder,
4888 LLVMInt8TypeInContext(gallivm->context),
4889 base_ptr, &offset, 1, "");
4890
4891 struct lp_build_loop_state loop_state;
4892 lp_build_loop_begin(&loop_state, gallivm, lp_build_const_int32(gallivm, 0));
4893 struct lp_build_if_state ifthen;
4894 LLVMValueRef cond;
4895 LLVMValueRef packed = rgba_in[0], packed2 = rgba2_in[0];
4896
4897 LLVMValueRef should_store_mask =
4898 LLVMBuildAnd(gallivm->builder, exec_mask,
4899 LLVMBuildNot(gallivm->builder, out_of_bounds, ""),
4900 "store_mask");
4901 assert(exec_mask);
4902
4903 cond = LLVMBuildICmp(gallivm->builder, LLVMIntNE, should_store_mask,
4904 lp_build_const_int_vec(gallivm, type, 0), "");
4905 cond = LLVMBuildExtractElement(gallivm->builder, cond,
4906 loop_state.counter, "");
4907 lp_build_if(&ifthen, gallivm, cond);
4908
4909 LLVMValueRef data =
4910 LLVMBuildExtractElement(gallivm->builder, packed, loop_state.counter, "");
4911 LLVMValueRef cast_base_ptr =
4912 LLVMBuildExtractElement(gallivm->builder, offset, loop_state.counter, "");
4913 cast_base_ptr = LLVMBuildBitCast(gallivm->builder, cast_base_ptr,
4914 LLVMPointerType(ref_type, 0), "");
4915 data = LLVMBuildBitCast(gallivm->builder, data,
4916 ref_type, "");
4917
4918 if (img_op == LP_IMG_ATOMIC_CAS) {
4919 LLVMValueRef cas_src_ptr =
4920 LLVMBuildExtractElement(gallivm->builder, packed2,
4921 loop_state.counter, "");
4922 LLVMValueRef cas_src =
4923 LLVMBuildBitCast(gallivm->builder, cas_src_ptr,
4924 ref_type, "");
4925 data = LLVMBuildAtomicCmpXchg(gallivm->builder, cast_base_ptr, data,
4926 cas_src,
4927 LLVMAtomicOrderingSequentiallyConsistent,
4928 LLVMAtomicOrderingSequentiallyConsistent,
4929 false);
4930 data = LLVMBuildExtractValue(gallivm->builder, data, 0, "");
4931 } else {
4932 data = LLVMBuildAtomicRMW(gallivm->builder, op,
4933 cast_base_ptr, data,
4934 LLVMAtomicOrderingSequentiallyConsistent,
4935 false);
4936 }
4937
4938 LLVMValueRef temp_res =
4939 LLVMBuildLoad2(gallivm->builder, atom_res_elem_type, atom_res, "");
4940 temp_res = LLVMBuildInsertElement(gallivm->builder, temp_res, data,
4941 loop_state.counter, "");
4942 LLVMBuildStore(gallivm->builder, temp_res, atom_res);
4943
4944 lp_build_endif(&ifthen);
4945 lp_build_loop_end_cond(&loop_state,
4946 lp_build_const_int32(gallivm, type.length),
4947 NULL, LLVMIntUGE);
4948 atomic_result[0] = LLVMBuildLoad2(gallivm->builder, atom_res_elem_type,
4949 atom_res, "");
4950 }
4951
4952
4953 static void
lp_build_img_op_no_format(struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef outdata[4])4954 lp_build_img_op_no_format(struct gallivm_state *gallivm,
4955 const struct lp_img_params *params,
4956 LLVMValueRef outdata[4])
4957 {
4958 /*
4959 * If there's nothing bound, format is NONE, and we must return
4960 * all zero as mandated by d3d10 in this case.
4961 */
4962 if (params->img_op != LP_IMG_STORE) {
4963 LLVMValueRef zero = lp_build_zero(gallivm, params->type);
4964 for (unsigned chan = 0; chan < (params->img_op == LP_IMG_LOAD ? 4 : 1);
4965 chan++) {
4966 outdata[chan] = zero;
4967 }
4968 }
4969 }
4970
4971
4972 void
lp_build_img_op_soa(const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef * outdata)4973 lp_build_img_op_soa(const struct lp_static_texture_state *static_texture_state,
4974 struct lp_sampler_dynamic_state *dynamic_state,
4975 struct gallivm_state *gallivm,
4976 const struct lp_img_params *params,
4977 LLVMValueRef *outdata)
4978 {
4979 const enum pipe_texture_target target = params->target;
4980 const unsigned dims = texture_dims(target);
4981 const struct util_format_description *format_desc =
4982 util_format_description(static_texture_state->format);
4983 const struct util_format_description *res_format_desc =
4984 util_format_description(static_texture_state->res_format);
4985 LLVMValueRef x = params->coords[0], y = params->coords[1],
4986 z = params->coords[2];
4987 LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
4988
4989 /** regular scalar int type */
4990 struct lp_type int_coord_type = lp_uint_type(params->type);
4991 struct lp_build_context int_coord_bld;
4992 lp_build_context_init(&int_coord_bld, gallivm, int_coord_type);
4993
4994 if (static_texture_state->format == PIPE_FORMAT_NONE) {
4995 lp_build_img_op_no_format(gallivm, params, outdata);
4996 return;
4997
4998 }
4999
5000 LLVMValueRef row_stride = dynamic_state->row_stride(gallivm,
5001 params->resources_type,
5002 params->resources_ptr,
5003 params->image_index, NULL, NULL);
5004 LLVMValueRef img_stride = dynamic_state->img_stride(gallivm,
5005 params->resources_type,
5006 params->resources_ptr,
5007 params->image_index, NULL, NULL);
5008 LLVMValueRef base_ptr = dynamic_state->base_ptr(gallivm,
5009 params->resources_type,
5010 params->resources_ptr,
5011 params->image_index, NULL);
5012 LLVMValueRef width = dynamic_state->width(gallivm,
5013 params->resources_type,
5014 params->resources_ptr,
5015 params->image_index, NULL);
5016 LLVMValueRef height = dynamic_state->height(gallivm,
5017 params->resources_type,
5018 params->resources_ptr,
5019 params->image_index, NULL);
5020 height = LLVMBuildZExt(gallivm->builder, height,
5021 int_coord_bld.elem_type, "");
5022 LLVMValueRef depth = dynamic_state->depth(gallivm,
5023 params->resources_type,
5024 params->resources_ptr,
5025 params->image_index, NULL);
5026 depth = LLVMBuildZExt(gallivm->builder, depth,
5027 int_coord_bld.elem_type, "");
5028 bool layer_coord = has_layer_coord(target);
5029
5030 width = lp_build_scale_view_dim(gallivm, width, res_format_desc->block.width,
5031 format_desc->block.width);
5032 width = lp_build_broadcast_scalar(&int_coord_bld, width);
5033 if (dims >= 2) {
5034 height = lp_build_scale_view_dim(gallivm, height, res_format_desc->block.height,
5035 format_desc->block.height);
5036 height = lp_build_broadcast_scalar(&int_coord_bld, height);
5037 row_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, row_stride);
5038 }
5039 if (dims >= 3 || layer_coord) {
5040 depth = lp_build_broadcast_scalar(&int_coord_bld, depth);
5041 img_stride_vec = lp_build_broadcast_scalar(&int_coord_bld, img_stride);
5042 }
5043
5044 LLVMValueRef out_of_bounds = int_coord_bld.zero;
5045 LLVMValueRef out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
5046 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
5047
5048 if (dims >= 2) {
5049 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
5050 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
5051 }
5052 if (dims >= 3 || layer_coord) {
5053 out1 = lp_build_cmp(&int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
5054 out_of_bounds = lp_build_or(&int_coord_bld, out_of_bounds, out1);
5055 }
5056
5057 LLVMValueRef offset, i, j;
5058 if (static_texture_state->tiled) {
5059 lp_build_tiled_sample_offset(&int_coord_bld,
5060 format_desc->format,
5061 static_texture_state,
5062 x, y, z, width, height, img_stride_vec,
5063 &offset, &i, &j);
5064 } else {
5065 lp_build_sample_offset(&int_coord_bld,
5066 format_desc,
5067 x, y, z, row_stride_vec, img_stride_vec,
5068 &offset, &i, &j);
5069 }
5070
5071 if (params->ms_index && static_texture_state->level_zero_only) {
5072 LLVMValueRef num_samples = dynamic_state->last_level(gallivm,
5073 params->resources_type,
5074 params->resources_ptr,
5075 params->image_index, NULL);
5076 num_samples = LLVMBuildZExt(gallivm->builder, num_samples,
5077 int_coord_bld.elem_type, "");
5078 LLVMValueRef sample_stride = dynamic_state->sample_stride(gallivm,
5079 params->resources_type,
5080 params->resources_ptr,
5081 params->image_index, NULL);
5082 lp_build_sample_ms_offset(&int_coord_bld,
5083 params->ms_index, num_samples,
5084 sample_stride, &offset,
5085 &out_of_bounds);
5086 }
5087 if (params->img_op == LP_IMG_LOAD || params->img_op == LP_IMG_LOAD_SPARSE) {
5088 struct lp_type texel_type = lp_build_texel_type(params->type, format_desc);
5089
5090 if (params->img_op == LP_IMG_LOAD_SPARSE && static_texture_state->tiled) {
5091 LLVMValueRef base_offset =
5092 dynamic_state->base_offset(gallivm, params->resources_type,
5093 params->resources_ptr, params->image_index, NULL);
5094 base_offset = lp_build_broadcast_scalar(&int_coord_bld, base_offset);
5095
5096 LLVMValueRef full_offset = LLVMBuildAdd(gallivm->builder, base_offset, offset, "");
5097
5098 lp_build_gather_resident(&int_coord_bld, dynamic_state,
5099 params->resources_type, params->resources_ptr,
5100 full_offset, &outdata[4]);
5101 }
5102
5103 offset = lp_build_andnot(&int_coord_bld, offset, out_of_bounds);
5104 struct lp_build_context texel_bld;
5105 lp_build_context_init(&texel_bld, gallivm, texel_type);
5106 lp_build_fetch_rgba_soa(gallivm,
5107 format_desc,
5108 texel_type, true,
5109 base_ptr, offset,
5110 i, j,
5111 NULL,
5112 outdata);
5113
5114 for (unsigned chan = 0; chan < 3; chan++) {
5115 outdata[chan] = lp_build_select(&texel_bld, out_of_bounds,
5116 texel_bld.zero, outdata[chan]);
5117 }
5118 if (format_desc->swizzle[3] == PIPE_SWIZZLE_1) {
5119 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5120 texel_bld.one, outdata[3]);
5121 } else {
5122 outdata[3] = lp_build_select(&texel_bld, out_of_bounds,
5123 texel_bld.zero, outdata[3]);
5124 }
5125 } else if (params->img_op == LP_IMG_STORE) {
5126 lp_build_store_rgba_soa(gallivm, format_desc, params->type,
5127 params->exec_mask, base_ptr, offset,
5128 out_of_bounds, params->indata);
5129 } else {
5130 lp_build_do_atomic_soa(gallivm, format_desc, params->type,
5131 params->exec_mask, base_ptr, offset,
5132 out_of_bounds, params->img_op, params->op,
5133 params->indata, params->indata2, outdata);
5134 }
5135 }
5136
5137
5138 /*
5139 * These functions are for indirect texture access suppoort.
5140 *
5141 * Indirect textures are implemented using a switch statement, that
5142 * takes the texture index and jumps to the sampler functions for
5143 * that texture unit.
5144 */
5145
5146 /*
5147 * Initialise an indexed sampler switch block.
5148 *
5149 * This sets up the switch_info state and adds the LLVM flow control pieces.
5150 */
5151 void
lp_build_sample_array_init_soa(struct lp_build_sample_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_sampler_params * params,LLVMValueRef idx,unsigned base,unsigned range)5152 lp_build_sample_array_init_soa(struct lp_build_sample_array_switch *switch_info,
5153 struct gallivm_state *gallivm,
5154 const struct lp_sampler_params *params,
5155 LLVMValueRef idx,
5156 unsigned base, unsigned range)
5157 {
5158 switch_info->gallivm = gallivm;
5159 switch_info->params = *params;
5160 switch_info->base = base;
5161 switch_info->range = range;
5162
5163 /* for generating the switch functions we don't want the texture index
5164 * offset
5165 */
5166 switch_info->params.texture_index_offset = 0;
5167
5168 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5169 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "texmerge");
5170
5171 switch_info->switch_ref = LLVMBuildSwitch(gallivm->builder, idx,
5172 switch_info->merge_ref,
5173 range - base);
5174
5175 LLVMTypeRef val_type[4];
5176 val_type[0] = val_type[1] = val_type[2] = val_type[3] =
5177 lp_build_vec_type(gallivm, params->type);
5178
5179 LLVMTypeRef ret_type =
5180 LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
5181
5182 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5183
5184 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5185
5186 switch_info->phi = LLVMBuildPhi(gallivm->builder, ret_type, "");
5187 LLVMAddIncoming(switch_info->phi, &undef_val, &initial_block, 1);
5188 }
5189
5190
5191 /*
5192 * Add an individual entry to the indirect texture switch.
5193 *
5194 * This builds the sample function and links a case for it into the switch
5195 * statement.
5196 */
5197 void
lp_build_sample_array_case_soa(struct lp_build_sample_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,const struct lp_static_sampler_state * static_sampler_state,struct lp_sampler_dynamic_state * dynamic_texture_state)5198 lp_build_sample_array_case_soa(struct lp_build_sample_array_switch *switch_info,
5199 int idx,
5200 const struct lp_static_texture_state *static_texture_state,
5201 const struct lp_static_sampler_state *static_sampler_state,
5202 struct lp_sampler_dynamic_state *dynamic_texture_state)
5203 {
5204 struct gallivm_state *gallivm = switch_info->gallivm;
5205 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "texblock");
5206
5207 LLVMAddCase(switch_info->switch_ref,
5208 LLVMConstInt(LLVMInt32TypeInContext(gallivm->context), idx, 0),
5209 this_block);
5210 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5211
5212 LLVMValueRef tex_ret;
5213 lp_build_sample_soa_func(gallivm, static_texture_state,
5214 static_sampler_state, dynamic_texture_state,
5215 &switch_info->params, idx, idx, &tex_ret);
5216
5217 LLVMAddIncoming(switch_info->phi, &tex_ret, &this_block, 1);
5218 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5219 }
5220
5221
5222 /*
5223 * Finish a switch statement.
5224 *
5225 * This handles extract the results from the switch.
5226 */
5227 void
lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch * switch_info)5228 lp_build_sample_array_fini_soa(struct lp_build_sample_array_switch *switch_info)
5229 {
5230 struct gallivm_state *gallivm = switch_info->gallivm;
5231
5232 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5233 for (unsigned i = 0; i < 4; i++) {
5234 switch_info->params.texel[i] =
5235 LLVMBuildExtractValue(gallivm->builder, switch_info->phi, i, "");
5236 }
5237 }
5238
5239
5240 void
lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch * switch_info,struct gallivm_state * gallivm,const struct lp_img_params * params,LLVMValueRef idx,unsigned base,unsigned range)5241 lp_build_image_op_switch_soa(struct lp_build_img_op_array_switch *switch_info,
5242 struct gallivm_state *gallivm,
5243 const struct lp_img_params *params,
5244 LLVMValueRef idx,
5245 unsigned base, unsigned range)
5246 {
5247 switch_info->gallivm = gallivm;
5248 switch_info->params = *params;
5249 switch_info->base = base;
5250 switch_info->range = range;
5251
5252 /* for generating the switch functions we don't want the texture index
5253 * offset
5254 */
5255 switch_info->params.image_index_offset = 0;
5256
5257 LLVMBasicBlockRef initial_block = LLVMGetInsertBlock(gallivm->builder);
5258 switch_info->merge_ref = lp_build_insert_new_block(gallivm, "imgmerge");
5259
5260 switch_info->switch_ref =
5261 LLVMBuildSwitch(gallivm->builder, idx,
5262 switch_info->merge_ref, range - base);
5263
5264 if (params->img_op != LP_IMG_STORE) {
5265 LLVMTypeRef ret_type = lp_build_vec_type(gallivm, params->type);
5266 LLVMValueRef undef_val = LLVMGetUndef(ret_type);
5267
5268 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5269
5270 for (unsigned i = 0; i < ((params->img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5271 switch_info->phi[i] = LLVMBuildPhi(gallivm->builder, ret_type, "");
5272 LLVMAddIncoming(switch_info->phi[i], &undef_val, &initial_block, 1);
5273 }
5274 }
5275 }
5276
5277
5278 void
lp_build_image_op_array_case(struct lp_build_img_op_array_switch * switch_info,int idx,const struct lp_static_texture_state * static_texture_state,struct lp_sampler_dynamic_state * dynamic_state)5279 lp_build_image_op_array_case(struct lp_build_img_op_array_switch *switch_info,
5280 int idx,
5281 const struct lp_static_texture_state *static_texture_state,
5282 struct lp_sampler_dynamic_state *dynamic_state)
5283 {
5284 struct gallivm_state *gallivm = switch_info->gallivm;
5285 LLVMBasicBlockRef this_block = lp_build_insert_new_block(gallivm, "img");
5286 LLVMValueRef tex_ret[4];
5287
5288 LLVMAddCase(switch_info->switch_ref,
5289 lp_build_const_int32(gallivm, idx), this_block);
5290 LLVMPositionBuilderAtEnd(gallivm->builder, this_block);
5291
5292 switch_info->params.image_index = idx;
5293
5294 lp_build_img_op_soa(static_texture_state, dynamic_state,
5295 switch_info->gallivm, &switch_info->params, tex_ret);
5296
5297 if (switch_info->params.img_op != LP_IMG_STORE) {
5298 for (unsigned i = 0;
5299 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5300 tex_ret[i] =
5301 LLVMBuildBitCast(gallivm->builder, tex_ret[i],
5302 lp_build_vec_type(gallivm,
5303 switch_info->params.type), "");
5304 }
5305
5306 this_block = LLVMGetInsertBlock(gallivm->builder);
5307 for (unsigned i = 0;
5308 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5309 LLVMAddIncoming(switch_info->phi[i], &tex_ret[i], &this_block, 1);
5310 }
5311 }
5312 LLVMBuildBr(gallivm->builder, switch_info->merge_ref);
5313 }
5314
5315
5316 void
lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch * switch_info)5317 lp_build_image_op_array_fini_soa(struct lp_build_img_op_array_switch *switch_info)
5318 {
5319 struct gallivm_state *gallivm = switch_info->gallivm;
5320
5321 LLVMPositionBuilderAtEnd(gallivm->builder, switch_info->merge_ref);
5322
5323 if (switch_info->params.img_op != LP_IMG_STORE) {
5324 for (unsigned i = 0;
5325 i < ((switch_info->params.img_op == LP_IMG_LOAD) ? 4 : 1); i++) {
5326 switch_info->params.outdata[i] = switch_info->phi[i];
5327 }
5328 }
5329 }
5330