1 /**************************************************************************
2 *
3 * Copyright 2009 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
16 * of the Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 *
26 **************************************************************************/
27
28 /**
29 * @file
30 * Texture sampling -- common code.
31 *
32 * @author Jose Fonseca <[email protected]>
33 */
34
35 #include "pipe/p_defines.h"
36 #include "pipe/p_state.h"
37 #include "util/format/u_format.h"
38 #include "util/u_math.h"
39 #include "util/u_cpu_detect.h"
40 #include "lp_bld_arit.h"
41 #include "lp_bld_const.h"
42 #include "lp_bld_debug.h"
43 #include "lp_bld_printf.h"
44 #include "lp_bld_flow.h"
45 #include "lp_bld_sample.h"
46 #include "lp_bld_swizzle.h"
47 #include "lp_bld_type.h"
48 #include "lp_bld_logic.h"
49 #include "lp_bld_pack.h"
50 #include "lp_bld_quad.h"
51 #include "lp_bld_bitarit.h"
52
53
54 /*
55 * Bri-linear factor. Should be greater than one.
56 */
57 #define BRILINEAR_FACTOR 2
58
59
60 /**
61 * Does the given texture wrap mode allow sampling the texture border color?
62 * XXX maybe move this into gallium util code.
63 */
64 bool
lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,enum pipe_tex_filter min_img_filter,enum pipe_tex_filter mag_img_filter)65 lp_sampler_wrap_mode_uses_border_color(enum pipe_tex_wrap mode,
66 enum pipe_tex_filter min_img_filter,
67 enum pipe_tex_filter mag_img_filter)
68 {
69 switch (mode) {
70 case PIPE_TEX_WRAP_REPEAT:
71 case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
72 case PIPE_TEX_WRAP_MIRROR_REPEAT:
73 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
74 return false;
75 case PIPE_TEX_WRAP_CLAMP:
76 case PIPE_TEX_WRAP_MIRROR_CLAMP:
77 if (min_img_filter == PIPE_TEX_FILTER_NEAREST &&
78 mag_img_filter == PIPE_TEX_FILTER_NEAREST) {
79 return false;
80 } else {
81 return true;
82 }
83 case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
84 case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
85 return true;
86 default:
87 assert(0 && "unexpected wrap mode");
88 return false;
89 }
90 }
91
92
93 /**
94 * Initialize lp_sampler_static_texture_state object with the gallium
95 * texture/sampler_view state (this contains the parts which are
96 * considered static).
97 */
98 void
lp_sampler_static_texture_state(struct lp_static_texture_state * state,const struct pipe_sampler_view * view)99 lp_sampler_static_texture_state(struct lp_static_texture_state *state,
100 const struct pipe_sampler_view *view)
101 {
102 memset(state, 0, sizeof *state);
103
104 if (!view || !view->texture)
105 return;
106
107 const struct pipe_resource *texture = view->texture;
108
109 state->format = view->format;
110 state->res_format = texture->format;
111 state->swizzle_r = view->swizzle_r;
112 state->swizzle_g = view->swizzle_g;
113 state->swizzle_b = view->swizzle_b;
114 state->swizzle_a = view->swizzle_a;
115 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
116 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
117 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
118 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
119
120 /* check if it is a tex2d created from buf */
121 if (view->is_tex2d_from_buf)
122 state->target = PIPE_TEXTURE_2D;
123 else
124 state->target = view->target;
125
126 state->res_target = texture->target;
127
128 state->pot_width = util_is_power_of_two_or_zero(texture->width0);
129 state->pot_height = util_is_power_of_two_or_zero(texture->height0);
130 state->pot_depth = util_is_power_of_two_or_zero(texture->depth0);
131 state->level_zero_only = !view->u.tex.last_level;
132 state->tiled = !!(texture->flags & PIPE_RESOURCE_FLAG_SPARSE);
133 if (state->tiled)
134 state->tiled_samples = texture->nr_samples;
135
136 /*
137 * the layer / element / level parameters are all either dynamic
138 * state or handled transparently wrt execution.
139 */
140 }
141
142
143 /**
144 * Initialize lp_sampler_static_texture_state object with the gallium
145 * texture/sampler_view state (this contains the parts which are
146 * considered static).
147 */
148 void
lp_sampler_static_texture_state_image(struct lp_static_texture_state * state,const struct pipe_image_view * view)149 lp_sampler_static_texture_state_image(struct lp_static_texture_state *state,
150 const struct pipe_image_view *view)
151 {
152 memset(state, 0, sizeof *state);
153
154 if (!view || !view->resource)
155 return;
156
157 const struct pipe_resource *resource = view->resource;
158
159 state->format = view->format;
160 state->res_format = resource->format;
161 state->swizzle_r = PIPE_SWIZZLE_X;
162 state->swizzle_g = PIPE_SWIZZLE_Y;
163 state->swizzle_b = PIPE_SWIZZLE_Z;
164 state->swizzle_a = PIPE_SWIZZLE_W;
165 assert(state->swizzle_r < PIPE_SWIZZLE_NONE);
166 assert(state->swizzle_g < PIPE_SWIZZLE_NONE);
167 assert(state->swizzle_b < PIPE_SWIZZLE_NONE);
168 assert(state->swizzle_a < PIPE_SWIZZLE_NONE);
169
170 state->target = resource->target;
171 state->res_target = resource->target;
172 state->pot_width = util_is_power_of_two_or_zero(resource->width0);
173 state->pot_height = util_is_power_of_two_or_zero(resource->height0);
174 state->pot_depth = util_is_power_of_two_or_zero(resource->depth0);
175 state->level_zero_only = view->u.tex.level == 0;
176 state->tiled = !!(resource->flags & PIPE_RESOURCE_FLAG_SPARSE);
177 if (state->tiled) {
178 state->tiled_samples = resource->nr_samples;
179 if (view->u.tex.is_2d_view_of_3d)
180 state->target = PIPE_TEXTURE_2D;
181 }
182
183 /*
184 * the layer / element / level parameters are all either dynamic
185 * state or handled transparently wrt execution.
186 */
187 }
188
189
190 /**
191 * Initialize lp_sampler_static_sampler_state object with the gallium sampler
192 * state (this contains the parts which are considered static).
193 */
194 void
lp_sampler_static_sampler_state(struct lp_static_sampler_state * state,const struct pipe_sampler_state * sampler)195 lp_sampler_static_sampler_state(struct lp_static_sampler_state *state,
196 const struct pipe_sampler_state *sampler)
197 {
198 memset(state, 0, sizeof *state);
199
200 if (!sampler)
201 return;
202
203 /*
204 * We don't copy sampler state over unless it is actually enabled, to avoid
205 * spurious recompiles, as the sampler static state is part of the shader
206 * key.
207 *
208 * Ideally gallium frontends or cso_cache module would make all state
209 * canonical, but until that happens it's better to be safe than sorry here.
210 *
211 * XXX: Actually there's much more than can be done here, especially
212 * regarding 1D/2D/3D/CUBE textures, wrap modes, etc.
213 */
214
215 state->wrap_s = sampler->wrap_s;
216 state->wrap_t = sampler->wrap_t;
217 state->wrap_r = sampler->wrap_r;
218 state->min_img_filter = sampler->min_img_filter;
219 state->mag_img_filter = sampler->mag_img_filter;
220 state->min_mip_filter = sampler->min_mip_filter;
221 state->seamless_cube_map = sampler->seamless_cube_map;
222 state->reduction_mode = sampler->reduction_mode;
223 state->aniso = sampler->max_anisotropy > 1.0f;
224
225 if (sampler->max_lod > 0.0f) {
226 state->max_lod_pos = 1;
227 }
228
229 if (sampler->lod_bias != 0.0f) {
230 state->lod_bias_non_zero = 1;
231 }
232
233 if (state->min_mip_filter != PIPE_TEX_MIPFILTER_NONE ||
234 state->min_img_filter != state->mag_img_filter) {
235
236 /* If min_lod == max_lod we can greatly simplify mipmap selection.
237 * This is a case that occurs during automatic mipmap generation.
238 */
239 if (sampler->min_lod == sampler->max_lod) {
240 state->min_max_lod_equal = 1;
241 } else {
242 if (sampler->min_lod > 0.0f) {
243 state->apply_min_lod = 1;
244 }
245
246 /*
247 * XXX this won't do anything with the mesa state tracker which always
248 * sets max_lod to not more than actually present mip maps...
249 */
250 if (sampler->max_lod < (PIPE_MAX_TEXTURE_LEVELS - 1)) {
251 state->apply_max_lod = 1;
252 }
253 }
254 }
255
256 state->compare_mode = sampler->compare_mode;
257 if (sampler->compare_mode != PIPE_TEX_COMPARE_NONE) {
258 state->compare_func = sampler->compare_func;
259 }
260
261 state->normalized_coords = !sampler->unnormalized_coords;
262 }
263
264
265 /* build aniso pmin value */
266 static LLVMValueRef
lp_build_pmin(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef max_aniso)267 lp_build_pmin(struct lp_build_sample_context *bld,
268 LLVMValueRef first_level,
269 LLVMValueRef s,
270 LLVMValueRef t,
271 LLVMValueRef max_aniso)
272 {
273 struct gallivm_state *gallivm = bld->gallivm;
274 LLVMBuilderRef builder = bld->gallivm->builder;
275 struct lp_build_context *coord_bld = &bld->coord_bld;
276 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
277 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
278 struct lp_build_context *pmin_bld = &bld->lodf_bld;
279 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
280 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
281 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
282 LLVMValueRef ddx_ddy = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
283 LLVMValueRef int_size, float_size;
284 const unsigned length = coord_bld->type.length;
285 const unsigned num_quads = length / 4;
286 const bool pmin_per_quad = pmin_bld->type.length != length;
287
288 int_size = lp_build_minify(int_size_bld, bld->int_size, first_level, true);
289 float_size = lp_build_int_to_float(float_size_bld, int_size);
290 max_aniso = lp_build_broadcast_scalar(coord_bld, max_aniso);
291 max_aniso = lp_build_mul(coord_bld, max_aniso, max_aniso);
292
293 static const unsigned char swizzle01[] = { /* no-op swizzle */
294 0, 1,
295 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
296 };
297 static const unsigned char swizzle23[] = {
298 2, 3,
299 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
300 };
301 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim, shuffles[LP_MAX_VECTOR_LENGTH / 4];
302
303 for (unsigned i = 0; i < num_quads; i++) {
304 shuffles[i*4+0] = shuffles[i*4+1] = index0;
305 shuffles[i*4+2] = shuffles[i*4+3] = index1;
306 }
307 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
308 LLVMConstVector(shuffles, length), "");
309 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, floatdim);
310
311 ddx_ddy = lp_build_mul(coord_bld, ddx_ddy, ddx_ddy);
312
313 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle01);
314 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy, swizzle23);
315
316 LLVMValueRef px2_py2 = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
317
318 static const unsigned char swizzle0[] = { /* no-op swizzle */
319 0, LP_BLD_SWIZZLE_DONTCARE,
320 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
321 };
322 static const unsigned char swizzle1[] = {
323 1, LP_BLD_SWIZZLE_DONTCARE,
324 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
325 };
326 LLVMValueRef px2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle0);
327 LLVMValueRef py2 = lp_build_swizzle_aos(coord_bld, px2_py2, swizzle1);
328
329 LLVMValueRef pmax2 = lp_build_max(coord_bld, px2, py2);
330 LLVMValueRef pmin2 = lp_build_min(coord_bld, px2, py2);
331
332 LLVMValueRef temp = lp_build_mul(coord_bld, pmin2, max_aniso);
333
334 LLVMValueRef comp = lp_build_compare(gallivm, coord_bld->type, PIPE_FUNC_GREATER,
335 pmin2, temp);
336
337 LLVMValueRef pmin2_alt = lp_build_div(coord_bld, pmax2, max_aniso);
338
339 pmin2 = lp_build_select(coord_bld, comp, pmin2_alt, pmin2);
340
341 if (pmin_per_quad)
342 pmin2 = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
343 pmin_bld->type, pmin2, 0);
344 else
345 pmin2 = lp_build_swizzle_scalar_aos(pmin_bld, pmin2, 0, 4);
346 return pmin2;
347 }
348
349
350 /**
351 * Generate code to compute coordinate gradient (rho).
352 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
353 *
354 * The resulting rho has bld->levelf format (per quad or per element).
355 */
356 static LLVMValueRef
lp_build_rho(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs)357 lp_build_rho(struct lp_build_sample_context *bld,
358 LLVMValueRef first_level,
359 LLVMValueRef s,
360 LLVMValueRef t,
361 LLVMValueRef r,
362 const struct lp_derivatives *derivs)
363 {
364 struct gallivm_state *gallivm = bld->gallivm;
365 struct lp_build_context *int_size_bld = &bld->int_size_in_bld;
366 struct lp_build_context *float_size_bld = &bld->float_size_in_bld;
367 struct lp_build_context *float_bld = &bld->float_bld;
368 struct lp_build_context *coord_bld = &bld->coord_bld;
369 struct lp_build_context *rho_bld = &bld->lodf_bld;
370 const unsigned dims = bld->dims;
371 LLVMValueRef ddx_ddy[2] = {NULL};
372 LLVMBuilderRef builder = bld->gallivm->builder;
373 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
374 LLVMValueRef index0 = LLVMConstInt(i32t, 0, 0);
375 LLVMValueRef index1 = LLVMConstInt(i32t, 1, 0);
376 LLVMValueRef index2 = LLVMConstInt(i32t, 2, 0);
377 LLVMValueRef rho_vec;
378 LLVMValueRef rho;
379 unsigned length = coord_bld->type.length;
380 unsigned num_quads = length / 4;
381 bool rho_per_quad = rho_bld->type.length != length;
382 bool no_rho_opt = bld->no_rho_approx && (dims > 1);
383 LLVMValueRef i32undef = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
384 LLVMValueRef rho_xvec, rho_yvec;
385
386 /* Note that all simplified calculations will only work for isotropic
387 * filtering
388 */
389
390 /*
391 * rho calcs are always per quad except for explicit derivs (excluding
392 * the messy cube maps for now) when requested.
393 */
394
395 LLVMValueRef int_size =
396 lp_build_minify(int_size_bld, bld->int_size, first_level, true);
397 LLVMValueRef float_size = lp_build_int_to_float(float_size_bld, int_size);
398
399 if (derivs) {
400 LLVMValueRef ddmax[3] = { NULL }, ddx[3] = { NULL }, ddy[3] = { NULL };
401 for (unsigned i = 0; i < dims; i++) {
402 LLVMValueRef indexi = lp_build_const_int32(gallivm, i);
403
404 LLVMValueRef floatdim =
405 lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
406 coord_bld->type, float_size, indexi);
407
408 /*
409 * note that for rho_per_quad case could reduce math (at some shuffle
410 * cost), but for now use same code to per-pixel lod case.
411 */
412 if (no_rho_opt) {
413 ddx[i] = lp_build_mul(coord_bld, floatdim, derivs->ddx[i]);
414 ddy[i] = lp_build_mul(coord_bld, floatdim, derivs->ddy[i]);
415 ddx[i] = lp_build_mul(coord_bld, ddx[i], ddx[i]);
416 ddy[i] = lp_build_mul(coord_bld, ddy[i], ddy[i]);
417 } else {
418 LLVMValueRef tmpx = lp_build_abs(coord_bld, derivs->ddx[i]);
419 LLVMValueRef tmpy = lp_build_abs(coord_bld, derivs->ddy[i]);
420 ddmax[i] = lp_build_max(coord_bld, tmpx, tmpy);
421 ddmax[i] = lp_build_mul(coord_bld, floatdim, ddmax[i]);
422 }
423 }
424 if (no_rho_opt) {
425 rho_xvec = lp_build_add(coord_bld, ddx[0], ddx[1]);
426 rho_yvec = lp_build_add(coord_bld, ddy[0], ddy[1]);
427 if (dims > 2) {
428 rho_xvec = lp_build_add(coord_bld, rho_xvec, ddx[2]);
429 rho_yvec = lp_build_add(coord_bld, rho_yvec, ddy[2]);
430 }
431 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
432 /* skipping sqrt hence returning rho squared */
433 } else {
434 rho = ddmax[0];
435 if (dims > 1) {
436 rho = lp_build_max(coord_bld, rho, ddmax[1]);
437 if (dims > 2) {
438 rho = lp_build_max(coord_bld, rho, ddmax[2]);
439 }
440 }
441 }
442
443 LLVMValueRef rho_is_inf = lp_build_is_inf_or_nan(gallivm,
444 coord_bld->type, rho);
445 rho = lp_build_select(coord_bld, rho_is_inf, coord_bld->zero, rho);
446
447 if (rho_per_quad) {
448 /*
449 * rho_vec contains per-pixel rho, convert to scalar per quad.
450 */
451 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
452 rho_bld->type, rho, 0);
453 }
454 } else {
455 /*
456 * This looks all a bit complex, but it's not that bad
457 * (the shuffle code makes it look worse than it is).
458 * Still, might not be ideal for all cases.
459 */
460 static const unsigned char swizzle0[] = { /* no-op swizzle */
461 0, LP_BLD_SWIZZLE_DONTCARE,
462 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
463 };
464 static const unsigned char swizzle1[] = {
465 1, LP_BLD_SWIZZLE_DONTCARE,
466 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
467 };
468 static const unsigned char swizzle2[] = {
469 2, LP_BLD_SWIZZLE_DONTCARE,
470 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
471 };
472
473 if (dims < 2) {
474 ddx_ddy[0] = lp_build_packed_ddx_ddy_onecoord(coord_bld, s);
475 } else if (dims >= 2) {
476 ddx_ddy[0] = lp_build_packed_ddx_ddy_twocoord(coord_bld, s, t);
477 if (dims > 2) {
478 ddx_ddy[1] = lp_build_packed_ddx_ddy_onecoord(coord_bld, r);
479 }
480 }
481
482 if (no_rho_opt) {
483 static const unsigned char swizzle01[] = { /* no-op swizzle */
484 0, 1,
485 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
486 };
487 static const unsigned char swizzle23[] = {
488 2, 3,
489 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
490 };
491 LLVMValueRef ddx_ddys, ddx_ddyt, floatdim;
492 LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4];
493
494 for (unsigned i = 0; i < num_quads; i++) {
495 shuffles[i*4+0] = shuffles[i*4+1] = index0;
496 shuffles[i*4+2] = shuffles[i*4+3] = index1;
497 }
498 floatdim = LLVMBuildShuffleVector(builder, float_size, float_size,
499 LLVMConstVector(shuffles, length),
500 "");
501 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], floatdim);
502 ddx_ddy[0] = lp_build_mul(coord_bld, ddx_ddy[0], ddx_ddy[0]);
503 ddx_ddys = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle01);
504 ddx_ddyt = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle23);
505 rho_vec = lp_build_add(coord_bld, ddx_ddys, ddx_ddyt);
506
507 if (dims > 2) {
508 static const unsigned char swizzle02[] = {
509 0, 2,
510 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
511 };
512 floatdim = lp_build_extract_broadcast(gallivm, bld->float_size_in_type,
513 coord_bld->type, float_size, index2);
514 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], floatdim);
515 ddx_ddy[1] = lp_build_mul(coord_bld, ddx_ddy[1], ddx_ddy[1]);
516 ddx_ddy[1] = lp_build_swizzle_aos(coord_bld, ddx_ddy[1], swizzle02);
517 rho_vec = lp_build_add(coord_bld, rho_vec, ddx_ddy[1]);
518 }
519
520 rho_xvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
521 rho_yvec = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
522 rho = lp_build_max(coord_bld, rho_xvec, rho_yvec);
523
524 if (rho_per_quad) {
525 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
526 rho_bld->type, rho, 0);
527 } else {
528 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
529 }
530 /* skipping sqrt hence returning rho squared */
531 } else {
532 ddx_ddy[0] = lp_build_abs(coord_bld, ddx_ddy[0]);
533 if (dims > 2) {
534 ddx_ddy[1] = lp_build_abs(coord_bld, ddx_ddy[1]);
535 } else {
536 ddx_ddy[1] = NULL; /* silence compiler warning */
537 }
538
539 if (dims < 2) {
540 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle0);
541 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle2);
542 } else if (dims == 2) {
543 static const unsigned char swizzle02[] = {
544 0, 2,
545 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
546 };
547 static const unsigned char swizzle13[] = {
548 1, 3,
549 LP_BLD_SWIZZLE_DONTCARE, LP_BLD_SWIZZLE_DONTCARE
550 };
551 rho_xvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle02);
552 rho_yvec = lp_build_swizzle_aos(coord_bld, ddx_ddy[0], swizzle13);
553 } else {
554 LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH];
555 LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH];
556 assert(dims == 3);
557 for (unsigned i = 0; i < num_quads; i++) {
558 shuffles1[4*i + 0] = lp_build_const_int32(gallivm, 4*i);
559 shuffles1[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 2);
560 shuffles1[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i);
561 shuffles1[4*i + 3] = i32undef;
562 shuffles2[4*i + 0] = lp_build_const_int32(gallivm, 4*i + 1);
563 shuffles2[4*i + 1] = lp_build_const_int32(gallivm, 4*i + 3);
564 shuffles2[4*i + 2] = lp_build_const_int32(gallivm, length + 4*i + 2);
565 shuffles2[4*i + 3] = i32undef;
566 }
567 rho_xvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
568 LLVMConstVector(shuffles1, length), "");
569 rho_yvec = LLVMBuildShuffleVector(builder, ddx_ddy[0], ddx_ddy[1],
570 LLVMConstVector(shuffles2, length), "");
571 }
572
573 rho_vec = lp_build_max(coord_bld, rho_xvec, rho_yvec);
574
575 if (bld->coord_type.length > 4) {
576 /* expand size to each quad */
577 if (dims > 1) {
578 /* could use some broadcast_vector helper for this? */
579 LLVMValueRef src[LP_MAX_VECTOR_LENGTH/4];
580 for (unsigned i = 0; i < num_quads; i++) {
581 src[i] = float_size;
582 }
583 float_size = lp_build_concat(bld->gallivm, src,
584 float_size_bld->type, num_quads);
585 } else {
586 float_size = lp_build_broadcast_scalar(coord_bld, float_size);
587 }
588 rho_vec = lp_build_mul(coord_bld, rho_vec, float_size);
589
590 if (dims <= 1) {
591 rho = rho_vec;
592 } else {
593 if (dims >= 2) {
594 LLVMValueRef rho_s, rho_t, rho_r;
595
596 rho_s = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle0);
597 rho_t = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle1);
598
599 rho = lp_build_max(coord_bld, rho_s, rho_t);
600
601 if (dims >= 3) {
602 rho_r = lp_build_swizzle_aos(coord_bld, rho_vec, swizzle2);
603 rho = lp_build_max(coord_bld, rho, rho_r);
604 }
605 }
606 }
607 if (rho_per_quad) {
608 rho = lp_build_pack_aos_scalars(bld->gallivm, coord_bld->type,
609 rho_bld->type, rho, 0);
610 } else {
611 rho = lp_build_swizzle_scalar_aos(coord_bld, rho, 0, 4);
612 }
613 } else {
614 if (dims <= 1) {
615 rho_vec = LLVMBuildExtractElement(builder, rho_vec, index0, "");
616 }
617 rho_vec = lp_build_mul(float_size_bld, rho_vec, float_size);
618
619 if (dims <= 1) {
620 rho = rho_vec;
621 } else {
622 if (dims >= 2) {
623 LLVMValueRef rho_s, rho_t, rho_r;
624
625 rho_s = LLVMBuildExtractElement(builder, rho_vec, index0, "");
626 rho_t = LLVMBuildExtractElement(builder, rho_vec, index1, "");
627
628 rho = lp_build_max(float_bld, rho_s, rho_t);
629
630 if (dims >= 3) {
631 rho_r = LLVMBuildExtractElement(builder, rho_vec, index2, "");
632 rho = lp_build_max(float_bld, rho, rho_r);
633 }
634 }
635 }
636 if (!rho_per_quad) {
637 rho = lp_build_broadcast_scalar(rho_bld, rho);
638 }
639 }
640 }
641 }
642
643 return rho;
644 }
645
646
647 /*
648 * Bri-linear lod computation
649 *
650 * Use a piece-wise linear approximation of log2 such that:
651 * - round to nearest, for values in the neighborhood of -1, 0, 1, 2, etc.
652 * - linear approximation for values in the neighborhood of 0.5, 1.5., etc,
653 * with the steepness specified in 'factor'
654 * - exact result for 0.5, 1.5, etc.
655 *
656 *
657 * 1.0 - /----*
658 * /
659 * /
660 * /
661 * 0.5 - *
662 * /
663 * /
664 * /
665 * 0.0 - *----/
666 *
667 * | |
668 * 2^0 2^1
669 *
670 * This is a technique also commonly used in hardware:
671 * - http://ixbtlabs.com/articles2/gffx/nv40-rx800-3.html
672 *
673 * TODO: For correctness, this should only be applied when texture is known to
674 * have regular mipmaps, i.e., mipmaps derived from the base level.
675 *
676 * TODO: This could be done in fixed point, where applicable.
677 */
678 static void
lp_build_brilinear_lod(struct lp_build_context * bld,LLVMValueRef lod,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)679 lp_build_brilinear_lod(struct lp_build_context *bld,
680 LLVMValueRef lod,
681 double factor,
682 LLVMValueRef *out_lod_ipart,
683 LLVMValueRef *out_lod_fpart)
684 {
685 LLVMValueRef lod_fpart;
686 double pre_offset = (factor - 0.5)/factor - 0.5;
687 double post_offset = 1 - factor;
688
689 if (0) {
690 lp_build_printf(bld->gallivm, "lod = %f\n", lod);
691 }
692
693 lod = lp_build_add(bld, lod,
694 lp_build_const_vec(bld->gallivm, bld->type, pre_offset));
695
696 lp_build_ifloor_fract(bld, lod, out_lod_ipart, &lod_fpart);
697
698 lod_fpart = lp_build_mad(bld, lod_fpart,
699 lp_build_const_vec(bld->gallivm, bld->type, factor),
700 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
701
702 /*
703 * It's not necessary to clamp lod_fpart since:
704 * - the above expression will never produce numbers greater than one.
705 * - the mip filtering branch is only taken if lod_fpart is positive
706 */
707
708 *out_lod_fpart = lod_fpart;
709
710 if (0) {
711 lp_build_printf(bld->gallivm, "lod_ipart = %i\n", *out_lod_ipart);
712 lp_build_printf(bld->gallivm, "lod_fpart = %f\n\n", *out_lod_fpart);
713 }
714 }
715
716
717 /*
718 * Combined log2 and brilinear lod computation.
719 *
720 * It's in all identical to calling lp_build_fast_log2() and
721 * lp_build_brilinear_lod() above, but by combining we can compute the integer
722 * and fractional part independently.
723 */
724 static void
lp_build_brilinear_rho(struct lp_build_context * bld,LLVMValueRef rho,double factor,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart)725 lp_build_brilinear_rho(struct lp_build_context *bld,
726 LLVMValueRef rho,
727 double factor,
728 LLVMValueRef *out_lod_ipart,
729 LLVMValueRef *out_lod_fpart)
730 {
731 const double pre_factor = (2*factor - 0.5)/(M_SQRT2*factor);
732 const double post_offset = 1 - 2*factor;
733
734 assert(bld->type.floating);
735
736 assert(lp_check_value(bld->type, rho));
737
738 /*
739 * The pre factor will make the intersections with the exact powers of two
740 * happen precisely where we want them to be, which means that the integer
741 * part will not need any post adjustments.
742 */
743 rho = lp_build_mul(bld, rho,
744 lp_build_const_vec(bld->gallivm, bld->type, pre_factor));
745
746 /* ipart = ifloor(log2(rho)) */
747 LLVMValueRef lod_ipart = lp_build_extract_exponent(bld, rho, 0);
748
749 /* fpart = rho / 2**ipart */
750 LLVMValueRef lod_fpart = lp_build_extract_mantissa(bld, rho);
751
752 lod_fpart =
753 lp_build_mad(bld, lod_fpart,
754 lp_build_const_vec(bld->gallivm, bld->type, factor),
755 lp_build_const_vec(bld->gallivm, bld->type, post_offset));
756
757 /*
758 * Like lp_build_brilinear_lod, it's not necessary to clamp lod_fpart since:
759 * - the above expression will never produce numbers greater than one.
760 * - the mip filtering branch is only taken if lod_fpart is positive
761 */
762
763 *out_lod_ipart = lod_ipart;
764 *out_lod_fpart = lod_fpart;
765 }
766
767
768 /**
769 * Fast implementation of iround(log2(sqrt(x))), based on
770 * log2(x^n) == n*log2(x).
771 *
772 * Gives accurate results all the time.
773 * (Could be trivially extended to handle other power-of-two roots.)
774 */
775 static LLVMValueRef
lp_build_ilog2_sqrt(struct lp_build_context * bld,LLVMValueRef x)776 lp_build_ilog2_sqrt(struct lp_build_context *bld,
777 LLVMValueRef x)
778 {
779 LLVMBuilderRef builder = bld->gallivm->builder;
780 struct lp_type i_type = lp_int_type(bld->type);
781 LLVMValueRef one = lp_build_const_int_vec(bld->gallivm, i_type, 1);
782
783 assert(bld->type.floating);
784
785 assert(lp_check_value(bld->type, x));
786
787 /* ipart = log2(x) + 0.5 = 0.5*(log2(x^2) + 1.0) */
788 LLVMValueRef ipart = lp_build_extract_exponent(bld, x, 1);
789 ipart = LLVMBuildAShr(builder, ipart, one, "");
790
791 return ipart;
792 }
793
794
795 /**
796 * Generate code to compute texture level of detail (lambda).
797 * \param derivs partial derivatives of (s, t, r, q) with respect to X and Y
798 * \param lod_bias optional float vector with the shader lod bias
799 * \param explicit_lod optional float vector with the explicit lod
800 * \param out_lod_ipart integer part of lod
801 * \param out_lod_fpart float part of lod (never larger than 1 but may be negative)
802 * \param out_lod_positive (mask) if lod is positive (i.e. texture is minified)
803 *
804 * The resulting lod can be scalar per quad or be per element.
805 */
806 void
lp_build_lod_selector(struct lp_build_sample_context * bld,bool is_lodq,unsigned sampler_unit,LLVMValueRef first_level,LLVMValueRef s,LLVMValueRef t,LLVMValueRef r,const struct lp_derivatives * derivs,LLVMValueRef lod_bias,LLVMValueRef explicit_lod,enum pipe_tex_mipfilter mip_filter,LLVMValueRef max_aniso,LLVMValueRef * out_lod,LLVMValueRef * out_lod_ipart,LLVMValueRef * out_lod_fpart,LLVMValueRef * out_lod_positive)807 lp_build_lod_selector(struct lp_build_sample_context *bld,
808 bool is_lodq,
809 unsigned sampler_unit,
810 LLVMValueRef first_level,
811 LLVMValueRef s,
812 LLVMValueRef t,
813 LLVMValueRef r,
814 const struct lp_derivatives *derivs,
815 LLVMValueRef lod_bias, /* optional */
816 LLVMValueRef explicit_lod, /* optional */
817 enum pipe_tex_mipfilter mip_filter,
818 LLVMValueRef max_aniso,
819 LLVMValueRef *out_lod,
820 LLVMValueRef *out_lod_ipart,
821 LLVMValueRef *out_lod_fpart,
822 LLVMValueRef *out_lod_positive)
823
824 {
825 LLVMBuilderRef builder = bld->gallivm->builder;
826 struct lp_sampler_dynamic_state *dynamic_state = bld->dynamic_state;
827 struct lp_build_context *lodf_bld = &bld->lodf_bld;
828 LLVMValueRef lod;
829
830 *out_lod_ipart = bld->lodi_bld.zero;
831 *out_lod_positive = bld->lodi_bld.zero;
832 *out_lod_fpart = lodf_bld->zero;
833
834 /*
835 * For determining min/mag, we follow GL 4.1 spec, 3.9.12 Texture
836 * Magnification: "Implementations may either unconditionally assume c = 0
837 * for the minification vs. magnification switch-over point, or may choose
838 * to make c depend on the combination of minification and magnification
839 * modes as follows: if the magnification filter is given by LINEAR and the
840 * minification filter is given by NEAREST_MIPMAP_NEAREST or
841 * NEAREST_MIPMAP_LINEAR, then c = 0.5. This is done to ensure that a
842 * minified texture does not appear "sharper" than a magnified
843 * texture. Otherwise c = 0." And 3.9.11 Texture Minification: "If lod is
844 * less than or equal to the constant c (see section 3.9.12) the texture is
845 * said to be magnified; if it is greater, the texture is minified." So,
846 * using 0 as switchover point always, and using magnification for lod ==
847 * 0. Note that the always c = 0 behavior is new (first appearing in GL
848 * 3.1 spec), old GL versions required 0.5 for the modes listed above. I
849 * have no clue about the (undocumented) wishes of d3d9/d3d10 here!
850 */
851
852 if (bld->static_sampler_state->min_max_lod_equal && !is_lodq) {
853 /* User is forcing sampling from a particular mipmap level.
854 * This is hit during mipmap generation.
855 */
856 LLVMValueRef min_lod =
857 dynamic_state->min_lod(bld->gallivm, bld->resources_type,
858 bld->resources_ptr, sampler_unit);
859
860 lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
861 } else {
862 if (explicit_lod) {
863 if (bld->num_lods != bld->coord_type.length)
864 lod = lp_build_pack_aos_scalars(bld->gallivm, bld->coord_bld.type,
865 lodf_bld->type, explicit_lod, 0);
866 else
867 lod = explicit_lod;
868 } else {
869 LLVMValueRef rho;
870 bool rho_squared = bld->no_rho_approx && (bld->dims > 1);
871
872 if (bld->static_sampler_state->aniso &&
873 !explicit_lod) {
874 rho = lp_build_pmin(bld, first_level, s, t, max_aniso);
875 rho_squared = true;
876 } else {
877 rho = lp_build_rho(bld, first_level, s, t, r, derivs);
878 }
879
880 /*
881 * Compute lod = log2(rho)
882 */
883
884 if (!lod_bias && !is_lodq &&
885 !bld->static_sampler_state->aniso &&
886 !bld->static_sampler_state->lod_bias_non_zero &&
887 !bld->static_sampler_state->apply_max_lod &&
888 !bld->static_sampler_state->apply_min_lod) {
889 /*
890 * Special case when there are no post-log2 adjustments, which
891 * saves instructions but keeping the integer and fractional lod
892 * computations separate from the start.
893 */
894
895 if (mip_filter == PIPE_TEX_MIPFILTER_NONE ||
896 mip_filter == PIPE_TEX_MIPFILTER_NEAREST) {
897 /*
898 * Don't actually need both values all the time, lod_ipart is
899 * needed for nearest mipfilter, lod_positive if min != mag.
900 */
901 if (rho_squared) {
902 *out_lod_ipart = lp_build_ilog2_sqrt(lodf_bld, rho);
903 } else {
904 *out_lod_ipart = lp_build_ilog2(lodf_bld, rho);
905 }
906 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
907 rho, lodf_bld->one);
908 return;
909 }
910 if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR &&
911 !bld->no_brilinear && !rho_squared &&
912 !bld->static_sampler_state->aniso) {
913 /*
914 * This can't work if rho is squared. Not sure if it could be
915 * fixed while keeping it worthwile, could also do sqrt here
916 * but brilinear and no_rho_opt seems like a combination not
917 * making much sense anyway so just use ordinary path below.
918 */
919 lp_build_brilinear_rho(lodf_bld, rho, BRILINEAR_FACTOR,
920 out_lod_ipart, out_lod_fpart);
921 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
922 rho, lodf_bld->one);
923 return;
924 }
925 }
926
927 if (0) {
928 lod = lp_build_log2(lodf_bld, rho);
929 } else {
930 /* get more accurate results if we just sqaure rho always */
931 if (!rho_squared)
932 rho = lp_build_mul(lodf_bld, rho, rho);
933 lod = lp_build_fast_log2(lodf_bld, rho);
934 }
935
936 /* log2(x^2) == 0.5*log2(x) */
937 lod = lp_build_mul(lodf_bld, lod,
938 lp_build_const_vec(bld->gallivm,
939 lodf_bld->type, 0.5F));
940
941 /* add shader lod bias */
942 if (lod_bias) {
943 if (bld->num_lods != bld->coord_type.length)
944 lod_bias = lp_build_pack_aos_scalars(bld->gallivm,
945 bld->coord_bld.type,
946 lodf_bld->type,
947 lod_bias, 0);
948 lod = LLVMBuildFAdd(builder, lod, lod_bias, "shader_lod_bias");
949 }
950 }
951
952 /* add sampler lod bias */
953 if (bld->static_sampler_state->lod_bias_non_zero) {
954 LLVMValueRef sampler_lod_bias =
955 dynamic_state->lod_bias(bld->gallivm, bld->resources_type,
956 bld->resources_ptr, sampler_unit);
957 sampler_lod_bias = lp_build_broadcast_scalar(lodf_bld,
958 sampler_lod_bias);
959 lod = LLVMBuildFAdd(builder, lod, sampler_lod_bias, "sampler_lod_bias");
960 }
961
962 if (is_lodq) {
963 *out_lod = lod;
964 }
965
966 /* clamp lod */
967 if (bld->static_sampler_state->apply_max_lod) {
968 LLVMValueRef max_lod =
969 dynamic_state->max_lod(bld->gallivm, bld->resources_type,
970 bld->resources_ptr, sampler_unit);
971 max_lod = lp_build_broadcast_scalar(lodf_bld, max_lod);
972
973 lod = lp_build_min(lodf_bld, lod, max_lod);
974 }
975 if (bld->static_sampler_state->apply_min_lod) {
976 LLVMValueRef min_lod =
977 dynamic_state->min_lod(bld->gallivm, bld->resources_type,
978 bld->resources_ptr, sampler_unit);
979 min_lod = lp_build_broadcast_scalar(lodf_bld, min_lod);
980
981 lod = lp_build_max(lodf_bld, lod, min_lod);
982 }
983
984 if (is_lodq) {
985 *out_lod_fpart = lod;
986 return;
987 }
988 }
989
990 *out_lod_positive = lp_build_cmp(lodf_bld, PIPE_FUNC_GREATER,
991 lod, lodf_bld->zero);
992
993 if (bld->static_sampler_state->aniso) {
994 *out_lod_ipart = lp_build_itrunc(lodf_bld, lod);
995 } else if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
996 if (!bld->no_brilinear) {
997 lp_build_brilinear_lod(lodf_bld, lod, BRILINEAR_FACTOR,
998 out_lod_ipart, out_lod_fpart);
999 } else {
1000 lp_build_ifloor_fract(lodf_bld, lod, out_lod_ipart, out_lod_fpart);
1001 }
1002
1003 lp_build_name(*out_lod_fpart, "lod_fpart");
1004 } else {
1005 *out_lod_ipart = lp_build_iround(lodf_bld, lod);
1006 }
1007
1008 lp_build_name(*out_lod_ipart, "lod_ipart");
1009
1010 return;
1011 }
1012
1013
1014 /**
1015 * For PIPE_TEX_MIPFILTER_NEAREST, convert int part of lod
1016 * to actual mip level.
1017 * Note: this is all scalar per quad code.
1018 * \param lod_ipart int texture level of detail
1019 * \param level_out returns integer
1020 * \param out_of_bounds returns per coord out_of_bounds mask if provided
1021 */
1022 void
lp_build_nearest_mip_level(struct lp_build_sample_context * bld,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * level_out,LLVMValueRef * out_of_bounds)1023 lp_build_nearest_mip_level(struct lp_build_sample_context *bld,
1024 LLVMValueRef first_level,
1025 LLVMValueRef last_level,
1026 LLVMValueRef lod_ipart,
1027 LLVMValueRef *level_out,
1028 LLVMValueRef *out_of_bounds)
1029 {
1030 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1031 LLVMValueRef level = lp_build_add(leveli_bld, lod_ipart, first_level);
1032
1033 if (out_of_bounds) {
1034 LLVMValueRef out, out1;
1035 out = lp_build_cmp(leveli_bld, PIPE_FUNC_LESS, level, first_level);
1036 out1 = lp_build_cmp(leveli_bld, PIPE_FUNC_GREATER, level, last_level);
1037 out = lp_build_or(leveli_bld, out, out1);
1038 if (bld->num_mips == bld->coord_bld.type.length) {
1039 *out_of_bounds = out;
1040 } else if (bld->num_mips == 1) {
1041 *out_of_bounds = lp_build_broadcast_scalar(&bld->int_coord_bld, out);
1042 } else {
1043 assert(bld->num_mips == bld->coord_bld.type.length / 4);
1044 *out_of_bounds =
1045 lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1046 leveli_bld->type,
1047 bld->int_coord_bld.type,
1048 out);
1049 }
1050 level = lp_build_andnot(&bld->int_coord_bld, level, *out_of_bounds);
1051 *level_out = level;
1052 } else {
1053 /* clamp level to legal range of levels */
1054 *level_out = lp_build_clamp(leveli_bld, level, first_level, last_level);
1055
1056 }
1057 }
1058
1059
1060 /**
1061 * For PIPE_TEX_MIPFILTER_LINEAR, convert per-quad (or per element) int LOD(s)
1062 * to two (per-quad) (adjacent) mipmap level indexes, and fix up float lod
1063 * part accordingly.
1064 * Later, we'll sample from those two mipmap levels and interpolate between
1065 * them.
1066 */
1067 void
lp_build_linear_mip_levels(struct lp_build_sample_context * bld,unsigned texture_unit,LLVMValueRef first_level,LLVMValueRef last_level,LLVMValueRef lod_ipart,LLVMValueRef * lod_fpart_inout,LLVMValueRef * level0_out,LLVMValueRef * level1_out)1068 lp_build_linear_mip_levels(struct lp_build_sample_context *bld,
1069 unsigned texture_unit,
1070 LLVMValueRef first_level,
1071 LLVMValueRef last_level,
1072 LLVMValueRef lod_ipart,
1073 LLVMValueRef *lod_fpart_inout,
1074 LLVMValueRef *level0_out,
1075 LLVMValueRef *level1_out)
1076 {
1077 LLVMBuilderRef builder = bld->gallivm->builder;
1078 struct lp_build_context *leveli_bld = &bld->leveli_bld;
1079 struct lp_build_context *levelf_bld = &bld->levelf_bld;
1080 LLVMValueRef clamp_min;
1081 LLVMValueRef clamp_max;
1082
1083 assert(bld->num_lods == bld->num_mips);
1084
1085 *level0_out = lp_build_add(leveli_bld, lod_ipart, first_level);
1086 *level1_out = lp_build_add(leveli_bld, *level0_out, leveli_bld->one);
1087
1088 /*
1089 * Clamp both *level0_out and *level1_out to [first_level, last_level],
1090 * with the minimum number of comparisons, and zeroing lod_fpart in the
1091 * extreme ends in the process.
1092 */
1093
1094 /* *level0_out < first_level */
1095 clamp_min = LLVMBuildICmp(builder, LLVMIntSLT,
1096 *level0_out, first_level,
1097 "clamp_lod_to_first");
1098
1099 *level0_out = LLVMBuildSelect(builder, clamp_min,
1100 first_level, *level0_out, "");
1101
1102 *level1_out = LLVMBuildSelect(builder, clamp_min,
1103 first_level, *level1_out, "");
1104
1105 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_min,
1106 levelf_bld->zero, *lod_fpart_inout, "");
1107
1108 /* *level0_out >= last_level */
1109 clamp_max = LLVMBuildICmp(builder, LLVMIntSGE,
1110 *level0_out, last_level,
1111 "clamp_lod_to_last");
1112
1113 *level0_out = LLVMBuildSelect(builder, clamp_max,
1114 last_level, *level0_out, "");
1115
1116 *level1_out = LLVMBuildSelect(builder, clamp_max,
1117 last_level, *level1_out, "");
1118
1119 *lod_fpart_inout = LLVMBuildSelect(builder, clamp_max,
1120 levelf_bld->zero, *lod_fpart_inout, "");
1121
1122 lp_build_name(*level0_out, "texture%u_miplevel0", texture_unit);
1123 lp_build_name(*level1_out, "texture%u_miplevel1", texture_unit);
1124 lp_build_name(*lod_fpart_inout, "texture%u_mipweight", texture_unit);
1125 }
1126
1127
1128 /**
1129 * A helper function that factorizes this common pattern.
1130 */
1131 LLVMValueRef
lp_sample_load_mip_value(struct gallivm_state * gallivm,LLVMTypeRef ptr_type,LLVMValueRef offsets,LLVMValueRef index1)1132 lp_sample_load_mip_value(struct gallivm_state *gallivm,
1133 LLVMTypeRef ptr_type,
1134 LLVMValueRef offsets,
1135 LLVMValueRef index1)
1136 {
1137 LLVMValueRef zero = lp_build_const_int32(gallivm, 0);
1138 LLVMValueRef indexes[2] = {zero, index1};
1139 LLVMValueRef ptr = LLVMBuildGEP2(gallivm->builder, ptr_type, offsets,
1140 indexes, ARRAY_SIZE(indexes), "");
1141 return LLVMBuildLoad2(gallivm->builder,
1142 LLVMInt32TypeInContext(gallivm->context), ptr, "");
1143 }
1144
1145
1146 /**
1147 * Return pointer to a single mipmap level.
1148 * \param level integer mipmap level
1149 */
1150 LLVMValueRef
lp_build_get_mipmap_level(struct lp_build_sample_context * bld,LLVMValueRef level)1151 lp_build_get_mipmap_level(struct lp_build_sample_context *bld,
1152 LLVMValueRef level)
1153 {
1154 LLVMValueRef mip_offset = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1155 bld->mip_offsets, level);
1156 LLVMBuilderRef builder = bld->gallivm->builder;
1157 LLVMValueRef data_ptr =
1158 LLVMBuildGEP2(builder,
1159 LLVMInt8TypeInContext(bld->gallivm->context),
1160 bld->base_ptr, &mip_offset, 1, "");
1161 return data_ptr;
1162 }
1163
1164
1165 /**
1166 * Return (per-pixel) offsets to mip levels.
1167 * \param level integer mipmap level
1168 */
1169 LLVMValueRef
lp_build_get_mip_offsets(struct lp_build_sample_context * bld,LLVMValueRef level)1170 lp_build_get_mip_offsets(struct lp_build_sample_context *bld,
1171 LLVMValueRef level)
1172 {
1173 LLVMBuilderRef builder = bld->gallivm->builder;
1174 LLVMValueRef offsets, offset1;
1175
1176 if (bld->num_mips == 1) {
1177 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type, bld->mip_offsets, level);
1178 offsets = lp_build_broadcast_scalar(&bld->int_coord_bld, offset1);
1179 } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1180 offsets = bld->int_coord_bld.undef;
1181 for (unsigned i = 0; i < bld->num_mips; i++) {
1182 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1183 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1184 bld->mip_offsets,
1185 LLVMBuildExtractElement(builder, level,
1186 indexi, ""));
1187 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1188 offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1189 indexo, "");
1190 }
1191 offsets = lp_build_swizzle_scalar_aos(&bld->int_coord_bld,
1192 offsets, 0, 4);
1193 } else {
1194 assert (bld->num_mips == bld->coord_bld.type.length);
1195
1196 offsets = bld->int_coord_bld.undef;
1197 for (unsigned i = 0; i < bld->num_mips; i++) {
1198 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1199 offset1 = lp_sample_load_mip_value(bld->gallivm, bld->mip_offsets_type,
1200 bld->mip_offsets,
1201 LLVMBuildExtractElement(builder, level,
1202 indexi, ""));
1203 offsets = LLVMBuildInsertElement(builder, offsets, offset1,
1204 indexi, "");
1205 }
1206 }
1207 return offsets;
1208 }
1209
1210
1211 /**
1212 * Codegen equivalent for u_minify().
1213 * @param lod_scalar if lod is a (broadcasted) scalar
1214 * Return max(1, base_size >> level);
1215 */
1216 LLVMValueRef
lp_build_minify(struct lp_build_context * bld,LLVMValueRef base_size,LLVMValueRef level,bool lod_scalar)1217 lp_build_minify(struct lp_build_context *bld,
1218 LLVMValueRef base_size,
1219 LLVMValueRef level,
1220 bool lod_scalar)
1221 {
1222 LLVMBuilderRef builder = bld->gallivm->builder;
1223 assert(lp_check_value(bld->type, base_size));
1224 assert(lp_check_value(bld->type, level));
1225
1226 if (level == bld->zero) {
1227 /* if we're using mipmap level zero, no minification is needed */
1228 return base_size;
1229 } else {
1230 LLVMValueRef size;
1231 assert(bld->type.sign);
1232 if (lod_scalar ||
1233 (util_get_cpu_caps()->has_avx2 || !util_get_cpu_caps()->has_sse)) {
1234 size = LLVMBuildLShr(builder, base_size, level, "minify");
1235 size = lp_build_max(bld, size, bld->one);
1236 } else {
1237 /*
1238 * emulate shift with float mul, since intel "forgot" shifts with
1239 * per-element shift count until avx2, which results in terrible
1240 * scalar extraction (both count and value), scalar shift,
1241 * vector reinsertion. Should not be an issue on any non-x86 cpu
1242 * with a vector instruction set.
1243 * On cpus with AMD's XOP this should also be unnecessary but I'm
1244 * not sure if llvm would emit this with current flags.
1245 */
1246 LLVMValueRef const127, const23, lf;
1247 struct lp_type ftype;
1248 struct lp_build_context fbld;
1249 ftype = lp_type_float_vec(32, bld->type.length * bld->type.width);
1250 lp_build_context_init(&fbld, bld->gallivm, ftype);
1251 const127 = lp_build_const_int_vec(bld->gallivm, bld->type, 127);
1252 const23 = lp_build_const_int_vec(bld->gallivm, bld->type, 23);
1253
1254 /* calculate 2^(-level) float */
1255 lf = lp_build_sub(bld, const127, level);
1256 lf = lp_build_shl(bld, lf, const23);
1257 lf = LLVMBuildBitCast(builder, lf, fbld.vec_type, "");
1258
1259 /* finish shift operation by doing float mul */
1260 base_size = lp_build_int_to_float(&fbld, base_size);
1261 size = lp_build_mul(&fbld, base_size, lf);
1262 /*
1263 * do the max also with floats because
1264 * a) non-emulated int max requires sse41
1265 * (this is actually a lie as we could cast to 16bit values
1266 * as 16bit is sufficient and 16bit int max is sse2)
1267 * b) with avx we can do int max 4-wide but float max 8-wide
1268 */
1269 size = lp_build_max(&fbld, size, fbld.one);
1270 size = lp_build_itrunc(&fbld, size);
1271 }
1272 return size;
1273 }
1274 }
1275
1276
1277 /*
1278 * Scale image dimensions with block sizes.
1279 *
1280 * tex_blocksize is the resource format blocksize
1281 * view_blocksize is the view format blocksize
1282 *
1283 * This must be applied post-minification, but
1284 * only when blocksizes are different.
1285 *
1286 * ret = (size + (tex_blocksize - 1)) >> log2(tex_blocksize);
1287 * ret *= blocksize;
1288 */
1289 LLVMValueRef
lp_build_scale_view_dims(struct lp_build_context * bld,LLVMValueRef size,LLVMValueRef tex_blocksize,LLVMValueRef tex_blocksize_log2,LLVMValueRef view_blocksize)1290 lp_build_scale_view_dims(struct lp_build_context *bld, LLVMValueRef size,
1291 LLVMValueRef tex_blocksize,
1292 LLVMValueRef tex_blocksize_log2,
1293 LLVMValueRef view_blocksize)
1294 {
1295 LLVMBuilderRef builder = bld->gallivm->builder;
1296 LLVMValueRef ret =
1297 LLVMBuildAdd(builder, size,
1298 LLVMBuildSub(builder, tex_blocksize,
1299 lp_build_const_int_vec(bld->gallivm,
1300 bld->type, 1), ""),
1301 "");
1302 ret = LLVMBuildLShr(builder, ret, tex_blocksize_log2, "");
1303 ret = LLVMBuildMul(builder, ret, view_blocksize, "");
1304 return ret;
1305 }
1306
1307
1308 /*
1309 * Scale a single image dimension.
1310 *
1311 * Scale one image between resource and view blocksizes.
1312 * noop if sizes are the same.
1313 */
1314 LLVMValueRef
lp_build_scale_view_dim(struct gallivm_state * gallivm,LLVMValueRef size,unsigned tex_blocksize,unsigned view_blocksize)1315 lp_build_scale_view_dim(struct gallivm_state *gallivm, LLVMValueRef size,
1316 unsigned tex_blocksize, unsigned view_blocksize)
1317 {
1318 if (tex_blocksize == view_blocksize)
1319 return size;
1320
1321 LLVMBuilderRef builder = gallivm->builder;
1322 LLVMValueRef ret =
1323 LLVMBuildAdd(builder, size,
1324 lp_build_const_int32(gallivm, tex_blocksize - 1), "");
1325 ret = LLVMBuildLShr(builder, ret,
1326 lp_build_const_int32(gallivm,
1327 util_logbase2(tex_blocksize)), "");
1328 ret = LLVMBuildMul(builder, ret,
1329 lp_build_const_int32(gallivm, view_blocksize), "");
1330 return ret;
1331 }
1332
1333
1334 /**
1335 * Dereference stride_array[mipmap_level] array to get a stride.
1336 * Return stride as a vector.
1337 */
1338 static LLVMValueRef
lp_build_get_level_stride_vec(struct lp_build_sample_context * bld,LLVMTypeRef stride_type,LLVMValueRef stride_array,LLVMValueRef level)1339 lp_build_get_level_stride_vec(struct lp_build_sample_context *bld,
1340 LLVMTypeRef stride_type,
1341 LLVMValueRef stride_array, LLVMValueRef level)
1342 {
1343 LLVMBuilderRef builder = bld->gallivm->builder;
1344 LLVMValueRef stride, stride1;
1345
1346 if (bld->num_mips == 1) {
1347 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array, level);
1348 stride = lp_build_broadcast_scalar(&bld->int_coord_bld, stride1);
1349 } else if (bld->num_mips == bld->coord_bld.type.length / 4) {
1350 LLVMValueRef stride1;
1351
1352 stride = bld->int_coord_bld.undef;
1353 for (unsigned i = 0; i < bld->num_mips; i++) {
1354 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1355 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1356 LLVMBuildExtractElement(builder, level,
1357 indexi, ""));
1358 LLVMValueRef indexo = lp_build_const_int32(bld->gallivm, 4 * i);
1359 stride = LLVMBuildInsertElement(builder, stride, stride1, indexo, "");
1360 }
1361 stride = lp_build_swizzle_scalar_aos(&bld->int_coord_bld, stride, 0, 4);
1362 } else {
1363 LLVMValueRef stride1;
1364
1365 assert (bld->num_mips == bld->coord_bld.type.length);
1366
1367 stride = bld->int_coord_bld.undef;
1368 for (unsigned i = 0; i < bld->coord_bld.type.length; i++) {
1369 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1370 stride1 = lp_sample_load_mip_value(bld->gallivm, stride_type, stride_array,
1371 LLVMBuildExtractElement(builder, level,
1372 indexi, ""));
1373 stride = LLVMBuildInsertElement(builder, stride, stride1, indexi, "");
1374 }
1375 }
1376 return stride;
1377 }
1378
1379
1380 /**
1381 * When sampling a mipmap, we need to compute the width, height, depth
1382 * of the source levels from the level indexes. This helper function
1383 * does that.
1384 */
1385 void
lp_build_mipmap_level_sizes(struct lp_build_sample_context * bld,LLVMValueRef ilevel,LLVMValueRef * out_size,LLVMValueRef * row_stride_vec,LLVMValueRef * img_stride_vec)1386 lp_build_mipmap_level_sizes(struct lp_build_sample_context *bld,
1387 LLVMValueRef ilevel,
1388 LLVMValueRef *out_size,
1389 LLVMValueRef *row_stride_vec,
1390 LLVMValueRef *img_stride_vec)
1391 {
1392 const unsigned dims = bld->dims;
1393 LLVMValueRef ilevel_vec;
1394
1395 /*
1396 * Compute width, height, depth at mipmap level 'ilevel'
1397 */
1398 if (bld->num_mips == 1) {
1399 ilevel_vec = lp_build_broadcast_scalar(&bld->int_size_bld, ilevel);
1400 *out_size = lp_build_minify(&bld->int_size_bld, bld->int_size,
1401 ilevel_vec, true);
1402 *out_size = lp_build_scale_view_dims(&bld->int_size_bld, *out_size,
1403 bld->int_tex_blocksize,
1404 bld->int_tex_blocksize_log2,
1405 bld->int_view_blocksize);
1406 } else {
1407 LLVMValueRef int_size_vec;
1408 LLVMValueRef int_tex_blocksize_vec, int_tex_blocksize_log2_vec;
1409 LLVMValueRef int_view_blocksize_vec;
1410 LLVMValueRef tmp[LP_MAX_VECTOR_LENGTH];
1411 const unsigned num_quads = bld->coord_bld.type.length / 4;
1412
1413 if (bld->num_mips == num_quads) {
1414 /*
1415 * XXX: this should be #ifndef SANE_INSTRUCTION_SET.
1416 * intel "forgot" the variable shift count instruction until avx2.
1417 * A harmless 8x32 shift gets translated into 32 instructions
1418 * (16 extracts, 8 scalar shifts, 8 inserts), llvm is apparently
1419 * unable to recognize if there are really just 2 different shift
1420 * count values. So do the shift 4-wide before expansion.
1421 */
1422 struct lp_build_context bld4;
1423 struct lp_type type4;
1424
1425 type4 = bld->int_coord_bld.type;
1426 type4.length = 4;
1427
1428 lp_build_context_init(&bld4, bld->gallivm, type4);
1429
1430 if (bld->dims == 1) {
1431 assert(bld->int_size_in_bld.type.length == 1);
1432 int_size_vec = lp_build_broadcast_scalar(&bld4,
1433 bld->int_size);
1434 int_tex_blocksize_vec =
1435 lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize);
1436 int_tex_blocksize_log2_vec =
1437 lp_build_broadcast_scalar(&bld4, bld->int_tex_blocksize_log2);
1438 int_view_blocksize_vec =
1439 lp_build_broadcast_scalar(&bld4, bld->int_view_blocksize);
1440 } else {
1441 assert(bld->int_size_in_bld.type.length == 4);
1442 int_size_vec = bld->int_size;
1443 int_tex_blocksize_vec = bld->int_tex_blocksize;
1444 int_tex_blocksize_log2_vec = bld->int_tex_blocksize_log2;
1445 int_view_blocksize_vec = bld->int_view_blocksize;
1446 }
1447
1448 for (unsigned i = 0; i < num_quads; i++) {
1449 LLVMValueRef ileveli;
1450 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1451
1452 ileveli = lp_build_extract_broadcast(bld->gallivm,
1453 bld->leveli_bld.type,
1454 bld4.type,
1455 ilevel,
1456 indexi);
1457 tmp[i] = lp_build_minify(&bld4, int_size_vec, ileveli, true);
1458 tmp[i] = lp_build_scale_view_dims(&bld4, tmp[i],
1459 int_tex_blocksize_vec,
1460 int_tex_blocksize_log2_vec,
1461 int_view_blocksize_vec);
1462 }
1463 /*
1464 * out_size is [w0, h0, d0, _, w1, h1, d1, _, ...] vector for
1465 * dims > 1, [w0, w0, w0, w0, w1, w1, w1, w1, ...] otherwise.
1466 */
1467 *out_size = lp_build_concat(bld->gallivm,
1468 tmp,
1469 bld4.type,
1470 num_quads);
1471 } else {
1472 /* FIXME: this is terrible and results in _huge_ vector
1473 * (for the dims > 1 case).
1474 * Should refactor this (together with extract_image_sizes) and do
1475 * something more useful. Could for instance if we have width,height
1476 * with 4-wide vector pack all elements into a 8xi16 vector
1477 * (on which we can still do useful math) instead of using a 16xi32
1478 * vector.
1479 * For dims == 1 this will create [w0, w1, w2, w3, ...] vector.
1480 * For dims > 1 this will create [w0, h0, d0, _, w1, h1, d1, _, ...]
1481 * vector.
1482 */
1483 assert(bld->num_mips == bld->coord_bld.type.length);
1484 if (bld->dims == 1) {
1485 assert(bld->int_size_in_bld.type.length == 1);
1486 int_size_vec = lp_build_broadcast_scalar(&bld->int_coord_bld,
1487 bld->int_size);
1488 int_tex_blocksize_vec =
1489 lp_build_broadcast_scalar(&bld->int_coord_bld,
1490 bld->int_tex_blocksize);
1491 int_tex_blocksize_log2_vec =
1492 lp_build_broadcast_scalar(&bld->int_coord_bld,
1493 bld->int_tex_blocksize_log2);
1494 int_view_blocksize_vec =
1495 lp_build_broadcast_scalar(&bld->int_coord_bld,
1496 bld->int_view_blocksize);
1497 *out_size = lp_build_minify(&bld->int_coord_bld, int_size_vec,
1498 ilevel, false);
1499 *out_size = lp_build_scale_view_dims(&bld->int_coord_bld,
1500 *out_size,
1501 int_tex_blocksize_vec,
1502 int_tex_blocksize_log2_vec,
1503 int_view_blocksize_vec);
1504 } else {
1505 LLVMValueRef ilevel1;
1506 for (unsigned i = 0; i < bld->num_mips; i++) {
1507 LLVMValueRef indexi = lp_build_const_int32(bld->gallivm, i);
1508 ilevel1 = lp_build_extract_broadcast(bld->gallivm,
1509 bld->int_coord_type,
1510 bld->int_size_in_bld.type,
1511 ilevel, indexi);
1512 tmp[i] = bld->int_size;
1513 tmp[i] = lp_build_minify(&bld->int_size_in_bld, tmp[i],
1514 ilevel1, true);
1515 tmp[i] = lp_build_scale_view_dims(&bld->int_size_in_bld,
1516 tmp[i],
1517 bld->int_tex_blocksize,
1518 bld->int_tex_blocksize_log2,
1519 bld->int_view_blocksize);
1520 }
1521 *out_size = lp_build_concat(bld->gallivm, tmp,
1522 bld->int_size_in_bld.type,
1523 bld->num_mips);
1524 }
1525 }
1526 }
1527
1528 if (dims >= 2) {
1529 *row_stride_vec = lp_build_get_level_stride_vec(bld,
1530 bld->row_stride_type,
1531 bld->row_stride_array,
1532 ilevel);
1533 }
1534 if (dims == 3 || has_layer_coord(bld->static_texture_state->target)) {
1535 *img_stride_vec = lp_build_get_level_stride_vec(bld,
1536 bld->img_stride_type,
1537 bld->img_stride_array,
1538 ilevel);
1539 }
1540 }
1541
1542
1543 /**
1544 * Extract and broadcast texture size.
1545 *
1546 * @param size_type type of the texture size vector (either
1547 * bld->int_size_type or bld->float_size_type)
1548 * @param coord_type type of the texture size vector (either
1549 * bld->int_coord_type or bld->coord_type)
1550 * @param size vector with the texture size (width, height, depth)
1551 */
1552 void
lp_build_extract_image_sizes(struct lp_build_sample_context * bld,struct lp_build_context * size_bld,struct lp_type coord_type,LLVMValueRef size,LLVMValueRef * out_width,LLVMValueRef * out_height,LLVMValueRef * out_depth)1553 lp_build_extract_image_sizes(struct lp_build_sample_context *bld,
1554 struct lp_build_context *size_bld,
1555 struct lp_type coord_type,
1556 LLVMValueRef size,
1557 LLVMValueRef *out_width,
1558 LLVMValueRef *out_height,
1559 LLVMValueRef *out_depth)
1560 {
1561 const unsigned dims = bld->dims;
1562 LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
1563 struct lp_type size_type = size_bld->type;
1564
1565 if (bld->num_mips == 1) {
1566 *out_width = lp_build_extract_broadcast(bld->gallivm,
1567 size_type,
1568 coord_type,
1569 size,
1570 LLVMConstInt(i32t, 0, 0));
1571 if (dims >= 2) {
1572 *out_height = lp_build_extract_broadcast(bld->gallivm,
1573 size_type,
1574 coord_type,
1575 size,
1576 LLVMConstInt(i32t, 1, 0));
1577 if (dims == 3) {
1578 *out_depth = lp_build_extract_broadcast(bld->gallivm,
1579 size_type,
1580 coord_type,
1581 size,
1582 LLVMConstInt(i32t, 2, 0));
1583 }
1584 }
1585 } else {
1586 unsigned num_quads = bld->coord_bld.type.length / 4;
1587
1588 if (dims == 1) {
1589 *out_width = size;
1590 } else if (bld->num_mips == num_quads) {
1591 *out_width = lp_build_swizzle_scalar_aos(size_bld, size, 0, 4);
1592 if (dims >= 2) {
1593 *out_height = lp_build_swizzle_scalar_aos(size_bld, size, 1, 4);
1594 if (dims == 3) {
1595 *out_depth = lp_build_swizzle_scalar_aos(size_bld, size, 2, 4);
1596 }
1597 }
1598 } else {
1599 assert(bld->num_mips == bld->coord_type.length);
1600 *out_width = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1601 coord_type, size, 0);
1602 if (dims >= 2) {
1603 *out_height = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1604 coord_type, size, 1);
1605 if (dims == 3) {
1606 *out_depth = lp_build_pack_aos_scalars(bld->gallivm, size_type,
1607 coord_type, size, 2);
1608 }
1609 }
1610 }
1611 }
1612 }
1613
1614
1615 /**
1616 * Unnormalize coords.
1617 *
1618 * @param flt_size vector with the integer texture size (width, height, depth)
1619 */
1620 void
lp_build_unnormalized_coords(struct lp_build_sample_context * bld,LLVMValueRef flt_size,LLVMValueRef * s,LLVMValueRef * t,LLVMValueRef * r)1621 lp_build_unnormalized_coords(struct lp_build_sample_context *bld,
1622 LLVMValueRef flt_size,
1623 LLVMValueRef *s,
1624 LLVMValueRef *t,
1625 LLVMValueRef *r)
1626 {
1627 const unsigned dims = bld->dims;
1628 LLVMValueRef width;
1629 LLVMValueRef height = NULL;
1630 LLVMValueRef depth = NULL;
1631
1632 lp_build_extract_image_sizes(bld,
1633 &bld->float_size_bld,
1634 bld->coord_type,
1635 flt_size,
1636 &width,
1637 &height,
1638 &depth);
1639
1640 /* s = s * width, t = t * height */
1641 *s = lp_build_mul(&bld->coord_bld, *s, width);
1642 if (dims >= 2) {
1643 *t = lp_build_mul(&bld->coord_bld, *t, height);
1644 if (dims >= 3) {
1645 *r = lp_build_mul(&bld->coord_bld, *r, depth);
1646 }
1647 }
1648 }
1649
1650
1651 /**
1652 * Generate new coords and faces for cubemap texels falling off the face.
1653 *
1654 * @param face face (center) of the pixel
1655 * @param x0 lower x coord
1656 * @param x1 higher x coord (must be x0 + 1)
1657 * @param y0 lower y coord
1658 * @param y1 higher y coord (must be x0 + 1)
1659 * @param max_coord texture cube (level) size - 1
1660 * @param next_faces new face values when falling off
1661 * @param next_xcoords new x coord values when falling off
1662 * @param next_ycoords new y coord values when falling off
1663 *
1664 * The arrays hold the new values when under/overflow of
1665 * lower x, higher x, lower y, higher y coord would occur (in this order).
1666 * next_xcoords/next_ycoords have two entries each (for both new lower and
1667 * higher coord).
1668 */
1669 void
lp_build_cube_new_coords(struct lp_build_context * ivec_bld,LLVMValueRef face,LLVMValueRef x0,LLVMValueRef x1,LLVMValueRef y0,LLVMValueRef y1,LLVMValueRef max_coord,LLVMValueRef next_faces[4],LLVMValueRef next_xcoords[4][2],LLVMValueRef next_ycoords[4][2])1670 lp_build_cube_new_coords(struct lp_build_context *ivec_bld,
1671 LLVMValueRef face,
1672 LLVMValueRef x0,
1673 LLVMValueRef x1,
1674 LLVMValueRef y0,
1675 LLVMValueRef y1,
1676 LLVMValueRef max_coord,
1677 LLVMValueRef next_faces[4],
1678 LLVMValueRef next_xcoords[4][2],
1679 LLVMValueRef next_ycoords[4][2])
1680 {
1681 /*
1682 * Lookup tables aren't nice for simd code hence try some logic here.
1683 * (Note that while it would not be necessary to do per-sample (4) lookups
1684 * when using a LUT as it's impossible that texels fall off of positive
1685 * and negative edges simultaneously, it would however be necessary to
1686 * do 2 lookups for corner handling as in this case texels both fall off
1687 * of x and y axes.)
1688 */
1689 /*
1690 * Next faces (for face 012345):
1691 * x < 0.0 : 451110
1692 * x >= 1.0 : 540001
1693 * y < 0.0 : 225422
1694 * y >= 1.0 : 334533
1695 * Hence nfx+ (and nfy+) == nfx- (nfy-) xor 1
1696 * nfx-: face > 1 ? (face == 5 ? 0 : 1) : (4 + face & 1)
1697 * nfy+: face & ~4 > 1 ? face + 2 : 3;
1698 * This could also use pshufb instead, but would need (manually coded)
1699 * ssse3 intrinsic (llvm won't do non-constant shuffles).
1700 */
1701 struct gallivm_state *gallivm = ivec_bld->gallivm;
1702 LLVMValueRef sel, sel_f2345, sel_f23, sel_f2, tmpsel, tmp;
1703 LLVMValueRef faceand1, sel_fand1, maxmx0, maxmx1, maxmy0, maxmy1;
1704 LLVMValueRef c2 = lp_build_const_int_vec(gallivm, ivec_bld->type, 2);
1705 LLVMValueRef c3 = lp_build_const_int_vec(gallivm, ivec_bld->type, 3);
1706 LLVMValueRef c4 = lp_build_const_int_vec(gallivm, ivec_bld->type, 4);
1707 LLVMValueRef c5 = lp_build_const_int_vec(gallivm, ivec_bld->type, 5);
1708
1709 sel = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c5);
1710 tmpsel = lp_build_select(ivec_bld, sel, ivec_bld->zero, ivec_bld->one);
1711 sel_f2345 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, face, ivec_bld->one);
1712 faceand1 = lp_build_and(ivec_bld, face, ivec_bld->one);
1713 tmp = lp_build_add(ivec_bld, faceand1, c4);
1714 next_faces[0] = lp_build_select(ivec_bld, sel_f2345, tmpsel, tmp);
1715 next_faces[1] = lp_build_xor(ivec_bld, next_faces[0], ivec_bld->one);
1716
1717 tmp = lp_build_andnot(ivec_bld, face, c4);
1718 sel_f23 = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, tmp, ivec_bld->one);
1719 tmp = lp_build_add(ivec_bld, face, c2);
1720 next_faces[3] = lp_build_select(ivec_bld, sel_f23, tmp, c3);
1721 next_faces[2] = lp_build_xor(ivec_bld, next_faces[3], ivec_bld->one);
1722
1723 /*
1724 * new xcoords (for face 012345):
1725 * x < 0.0 : max max t max-t max max
1726 * x >= 1.0 : 0 0 max-t t 0 0
1727 * y < 0.0 : max 0 max-s s s max-s
1728 * y >= 1.0 : max 0 s max-s s max-s
1729 *
1730 * ncx[1] = face & ~4 > 1 ? (face == 2 ? max-t : t) : 0
1731 * ncx[0] = max - ncx[1]
1732 * ncx[3] = face > 1 ? (face & 1 ? max-s : s) : (face & 1) ? 0 : max
1733 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1734 */
1735 sel_f2 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, face, c2);
1736 maxmy0 = lp_build_sub(ivec_bld, max_coord, y0);
1737 tmp = lp_build_select(ivec_bld, sel_f2, maxmy0, y0);
1738 next_xcoords[1][0] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1739 next_xcoords[0][0] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][0]);
1740 maxmy1 = lp_build_sub(ivec_bld, max_coord, y1);
1741 tmp = lp_build_select(ivec_bld, sel_f2, maxmy1, y1);
1742 next_xcoords[1][1] = lp_build_select(ivec_bld, sel_f23, tmp, ivec_bld->zero);
1743 next_xcoords[0][1] = lp_build_sub(ivec_bld, max_coord, next_xcoords[1][1]);
1744
1745 sel_fand1 = lp_build_cmp(ivec_bld, PIPE_FUNC_EQUAL, faceand1, ivec_bld->one);
1746
1747 tmpsel = lp_build_select(ivec_bld, sel_fand1, ivec_bld->zero, max_coord);
1748 maxmx0 = lp_build_sub(ivec_bld, max_coord, x0);
1749 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1750 next_xcoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1751 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][0]);
1752 next_xcoords[2][0] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][0]);
1753 maxmx1 = lp_build_sub(ivec_bld, max_coord, x1);
1754 tmp = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1755 next_xcoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1756 tmp = lp_build_sub(ivec_bld, max_coord, next_xcoords[3][1]);
1757 next_xcoords[2][1] = lp_build_select(ivec_bld, sel_f23, tmp, next_xcoords[3][1]);
1758
1759 /*
1760 * new ycoords (for face 012345):
1761 * x < 0.0 : t t 0 max t t
1762 * x >= 1.0 : t t 0 max t t
1763 * y < 0.0 : max-s s 0 max max 0
1764 * y >= 1.0 : s max-s 0 max 0 max
1765 *
1766 * ncy[0] = face & ~4 > 1 ? (face == 2 ? 0 : max) : t
1767 * ncy[1] = ncy[0]
1768 * ncy[3] = face > 1 ? (face & 1 ? max : 0) : (face & 1) ? max-s : max
1769 * ncx[2] = face & ~4 > 1 ? max - ncx[3] : ncx[3]
1770 */
1771 tmp = lp_build_select(ivec_bld, sel_f2, ivec_bld->zero, max_coord);
1772 next_ycoords[0][0] = lp_build_select(ivec_bld, sel_f23, tmp, y0);
1773 next_ycoords[1][0] = next_ycoords[0][0];
1774 next_ycoords[0][1] = lp_build_select(ivec_bld, sel_f23, tmp, y1);
1775 next_ycoords[1][1] = next_ycoords[0][1];
1776
1777 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx0, x0);
1778 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1779 next_ycoords[3][0] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1780 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][0]);
1781 next_ycoords[2][0] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][0], tmp);
1782 tmpsel = lp_build_select(ivec_bld, sel_fand1, maxmx1, x1);
1783 tmp = lp_build_select(ivec_bld, sel_fand1, max_coord, ivec_bld->zero);
1784 next_ycoords[3][1] = lp_build_select(ivec_bld, sel_f2345, tmp, tmpsel);
1785 tmp = lp_build_sub(ivec_bld, max_coord, next_ycoords[3][1]);
1786 next_ycoords[2][1] = lp_build_select(ivec_bld, sel_f23, next_ycoords[3][1], tmp);
1787 }
1788
1789
1790 /** Helper used by lp_build_cube_lookup() */
1791 static LLVMValueRef
lp_build_cube_imapos(struct lp_build_context * coord_bld,LLVMValueRef coord)1792 lp_build_cube_imapos(struct lp_build_context *coord_bld, LLVMValueRef coord)
1793 {
1794 /* ima = +0.5 / abs(coord); */
1795 LLVMValueRef posHalf = lp_build_const_vec(coord_bld->gallivm, coord_bld->type, 0.5);
1796 LLVMValueRef absCoord = lp_build_abs(coord_bld, coord);
1797 /* avoid div by zero */
1798 LLVMValueRef sel = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, absCoord, coord_bld->zero);
1799 LLVMValueRef div = lp_build_div(coord_bld, posHalf, absCoord);
1800 LLVMValueRef ima = lp_build_select(coord_bld, sel, div, coord_bld->zero);
1801 return ima;
1802 }
1803
1804
1805 /** Helper for doing 3-wise selection.
1806 * Returns sel1 ? val2 : (sel0 ? val0 : val1).
1807 */
1808 static LLVMValueRef
lp_build_select3(struct lp_build_context * sel_bld,LLVMValueRef sel0,LLVMValueRef sel1,LLVMValueRef val0,LLVMValueRef val1,LLVMValueRef val2)1809 lp_build_select3(struct lp_build_context *sel_bld,
1810 LLVMValueRef sel0,
1811 LLVMValueRef sel1,
1812 LLVMValueRef val0,
1813 LLVMValueRef val1,
1814 LLVMValueRef val2)
1815 {
1816 LLVMValueRef tmp = lp_build_select(sel_bld, sel0, val0, val1);
1817 return lp_build_select(sel_bld, sel1, val2, tmp);
1818 }
1819
1820
1821 /**
1822 * Generate code to do cube face selection and compute per-face texcoords.
1823 */
1824 void
lp_build_cube_lookup(struct lp_build_sample_context * bld,LLVMValueRef * coords,const struct lp_derivatives * derivs_in,struct lp_derivatives * derivs_out,bool need_derivs)1825 lp_build_cube_lookup(struct lp_build_sample_context *bld,
1826 LLVMValueRef *coords,
1827 const struct lp_derivatives *derivs_in, /* optional */
1828 struct lp_derivatives *derivs_out, /* optional */
1829 bool need_derivs)
1830 {
1831 struct lp_build_context *coord_bld = &bld->coord_bld;
1832 LLVMBuilderRef builder = bld->gallivm->builder;
1833 struct gallivm_state *gallivm = bld->gallivm;
1834 LLVMValueRef si, ti, ri;
1835
1836 /*
1837 * Do per-pixel face selection. We cannot however (as we used to do)
1838 * simply calculate the derivs afterwards (which is very bogus for
1839 * explicit derivs btw) because the values would be "random" when
1840 * not all pixels lie on the same face.
1841 */
1842 struct lp_build_context *cint_bld = &bld->int_coord_bld;
1843 struct lp_type intctype = cint_bld->type;
1844 LLVMTypeRef coord_vec_type = coord_bld->vec_type;
1845 LLVMTypeRef cint_vec_type = cint_bld->vec_type;
1846 LLVMValueRef as, at, ar, face, face_s, face_t;
1847 LLVMValueRef as_ge_at, maxasat, ar_ge_as_at;
1848 LLVMValueRef snewx, tnewx, snewy, tnewy, snewz, tnewz;
1849 LLVMValueRef tnegi, rnegi;
1850 LLVMValueRef ma, mai, signma, signmabit, imahalfpos;
1851 LLVMValueRef posHalf = lp_build_const_vec(gallivm, coord_bld->type, 0.5);
1852 LLVMValueRef signmask = lp_build_const_int_vec(gallivm, intctype,
1853 1LL << (intctype.width - 1));
1854 LLVMValueRef signshift = lp_build_const_int_vec(gallivm, intctype,
1855 intctype.width -1);
1856 LLVMValueRef facex = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_X);
1857 LLVMValueRef facey = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Y);
1858 LLVMValueRef facez = lp_build_const_int_vec(gallivm, intctype, PIPE_TEX_FACE_POS_Z);
1859 LLVMValueRef s = coords[0];
1860 LLVMValueRef t = coords[1];
1861 LLVMValueRef r = coords[2];
1862
1863 assert(PIPE_TEX_FACE_NEG_X == PIPE_TEX_FACE_POS_X + 1);
1864 assert(PIPE_TEX_FACE_NEG_Y == PIPE_TEX_FACE_POS_Y + 1);
1865 assert(PIPE_TEX_FACE_NEG_Z == PIPE_TEX_FACE_POS_Z + 1);
1866
1867 /*
1868 * get absolute value (for x/y/z face selection) and sign bit
1869 * (for mirroring minor coords and pos/neg face selection)
1870 * of the original coords.
1871 */
1872 as = lp_build_abs(&bld->coord_bld, s);
1873 at = lp_build_abs(&bld->coord_bld, t);
1874 ar = lp_build_abs(&bld->coord_bld, r);
1875
1876 /*
1877 * major face determination: select x if x > y else select y
1878 * select z if z >= max(x,y) else select previous result
1879 * if some axis are the same we chose z over y, y over x - the
1880 * dx10 spec seems to ask for it while OpenGL doesn't care (if we
1881 * wouldn't care could save a select or two if using different
1882 * compares and doing at_g_as_ar last since tnewx and tnewz are the
1883 * same).
1884 */
1885 as_ge_at = lp_build_cmp(coord_bld, PIPE_FUNC_GREATER, as, at);
1886 maxasat = lp_build_max(coord_bld, as, at);
1887 ar_ge_as_at = lp_build_cmp(coord_bld, PIPE_FUNC_GEQUAL, ar, maxasat);
1888
1889 if (need_derivs) {
1890 /*
1891 * XXX: This is really really complex.
1892 * It is a bit overkill to use this for implicit derivatives as well,
1893 * no way this is worth the cost in practice, but seems to be the
1894 * only way for getting accurate and per-pixel lod values.
1895 */
1896 LLVMValueRef ima, imahalf, tmp, ddx[3], ddy[3];
1897 LLVMValueRef madx, mady, madxdivma, madydivma;
1898 LLVMValueRef sdxi, tdxi, rdxi, sdyi, tdyi, rdyi;
1899 LLVMValueRef tdxnegi, rdxnegi, tdynegi, rdynegi;
1900 LLVMValueRef sdxnewx, sdxnewy, sdxnewz, tdxnewx, tdxnewy, tdxnewz;
1901 LLVMValueRef sdynewx, sdynewy, sdynewz, tdynewx, tdynewy, tdynewz;
1902 LLVMValueRef face_sdx, face_tdx, face_sdy, face_tdy;
1903 /*
1904 * s = 1/2 * (sc / ma + 1)
1905 * t = 1/2 * (tc / ma + 1)
1906 *
1907 * s' = 1/2 * (sc' * ma - sc * ma') / ma^2
1908 * t' = 1/2 * (tc' * ma - tc * ma') / ma^2
1909 *
1910 * dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma
1911 * dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma
1912 * dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma
1913 * dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma
1914 */
1915
1916 /* select ma, calculate ima */
1917 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
1918 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
1919 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
1920 ima = lp_build_div(coord_bld, coord_bld->one, ma);
1921 imahalf = lp_build_mul(coord_bld, posHalf, ima);
1922 imahalfpos = lp_build_abs(coord_bld, imahalf);
1923
1924 if (!derivs_in) {
1925 ddx[0] = lp_build_ddx(coord_bld, s);
1926 ddx[1] = lp_build_ddx(coord_bld, t);
1927 ddx[2] = lp_build_ddx(coord_bld, r);
1928 ddy[0] = lp_build_ddy(coord_bld, s);
1929 ddy[1] = lp_build_ddy(coord_bld, t);
1930 ddy[2] = lp_build_ddy(coord_bld, r);
1931 } else {
1932 ddx[0] = derivs_in->ddx[0];
1933 ddx[1] = derivs_in->ddx[1];
1934 ddx[2] = derivs_in->ddx[2];
1935 ddy[0] = derivs_in->ddy[0];
1936 ddy[1] = derivs_in->ddy[1];
1937 ddy[2] = derivs_in->ddy[2];
1938 }
1939
1940 /* select major derivatives */
1941 madx = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddx[0], ddx[1], ddx[2]);
1942 mady = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, ddy[0], ddy[1], ddy[2]);
1943
1944 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
1945 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
1946 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
1947
1948 sdxi = LLVMBuildBitCast(builder, ddx[0], cint_vec_type, "");
1949 tdxi = LLVMBuildBitCast(builder, ddx[1], cint_vec_type, "");
1950 rdxi = LLVMBuildBitCast(builder, ddx[2], cint_vec_type, "");
1951
1952 sdyi = LLVMBuildBitCast(builder, ddy[0], cint_vec_type, "");
1953 tdyi = LLVMBuildBitCast(builder, ddy[1], cint_vec_type, "");
1954 rdyi = LLVMBuildBitCast(builder, ddy[2], cint_vec_type, "");
1955
1956 /*
1957 * compute all possible new s/t coords, which does the mirroring,
1958 * and do the same for derivs minor axes.
1959 * snewx = signma * -r;
1960 * tnewx = -t;
1961 * snewy = s;
1962 * tnewy = signma * r;
1963 * snewz = signma * s;
1964 * tnewz = -t;
1965 */
1966 tnegi = LLVMBuildXor(builder, ti, signmask, "");
1967 rnegi = LLVMBuildXor(builder, ri, signmask, "");
1968 tdxnegi = LLVMBuildXor(builder, tdxi, signmask, "");
1969 rdxnegi = LLVMBuildXor(builder, rdxi, signmask, "");
1970 tdynegi = LLVMBuildXor(builder, tdyi, signmask, "");
1971 rdynegi = LLVMBuildXor(builder, rdyi, signmask, "");
1972
1973 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
1974 tnewx = tnegi;
1975 sdxnewx = LLVMBuildXor(builder, signmabit, rdxnegi, "");
1976 tdxnewx = tdxnegi;
1977 sdynewx = LLVMBuildXor(builder, signmabit, rdynegi, "");
1978 tdynewx = tdynegi;
1979
1980 snewy = si;
1981 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
1982 sdxnewy = sdxi;
1983 tdxnewy = LLVMBuildXor(builder, signmabit, rdxi, "");
1984 sdynewy = sdyi;
1985 tdynewy = LLVMBuildXor(builder, signmabit, rdyi, "");
1986
1987 snewz = LLVMBuildXor(builder, signmabit, si, "");
1988 tnewz = tnegi;
1989 sdxnewz = LLVMBuildXor(builder, signmabit, sdxi, "");
1990 tdxnewz = tdxnegi;
1991 sdynewz = LLVMBuildXor(builder, signmabit, sdyi, "");
1992 tdynewz = tdynegi;
1993
1994 /* select the mirrored values */
1995 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
1996 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
1997 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
1998 face_sdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdxnewx, sdxnewy, sdxnewz);
1999 face_tdx = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdxnewx, tdxnewy, tdxnewz);
2000 face_sdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, sdynewx, sdynewy, sdynewz);
2001 face_tdy = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tdynewx, tdynewy, tdynewz);
2002
2003 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2004 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2005 face_sdx = LLVMBuildBitCast(builder, face_sdx, coord_vec_type, "");
2006 face_tdx = LLVMBuildBitCast(builder, face_tdx, coord_vec_type, "");
2007 face_sdy = LLVMBuildBitCast(builder, face_sdy, coord_vec_type, "");
2008 face_tdy = LLVMBuildBitCast(builder, face_tdy, coord_vec_type, "");
2009
2010 /* deriv math, dx.s = 0.5 * (dx.sc - sc * dx.ma / ma) / ma */
2011 madxdivma = lp_build_mul(coord_bld, madx, ima);
2012 tmp = lp_build_mul(coord_bld, madxdivma, face_s);
2013 tmp = lp_build_sub(coord_bld, face_sdx, tmp);
2014 derivs_out->ddx[0] = lp_build_mul(coord_bld, tmp, imahalf);
2015
2016 /* dx.t = 0.5 * (dx.tc - tc * dx.ma / ma) / ma */
2017 tmp = lp_build_mul(coord_bld, madxdivma, face_t);
2018 tmp = lp_build_sub(coord_bld, face_tdx, tmp);
2019 derivs_out->ddx[1] = lp_build_mul(coord_bld, tmp, imahalf);
2020
2021 /* dy.s = 0.5 * (dy.sc - sc * dy.ma / ma) / ma */
2022 madydivma = lp_build_mul(coord_bld, mady, ima);
2023 tmp = lp_build_mul(coord_bld, madydivma, face_s);
2024 tmp = lp_build_sub(coord_bld, face_sdy, tmp);
2025 derivs_out->ddy[0] = lp_build_mul(coord_bld, tmp, imahalf);
2026
2027 /* dy.t = 0.5 * (dy.tc - tc * dy.ma / ma) / ma */
2028 tmp = lp_build_mul(coord_bld, madydivma, face_t);
2029 tmp = lp_build_sub(coord_bld, face_tdy, tmp);
2030 derivs_out->ddy[1] = lp_build_mul(coord_bld, tmp, imahalf);
2031
2032 signma = LLVMBuildLShr(builder, mai, signshift, "");
2033 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2034
2035 /* project coords */
2036 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2037 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2038
2039 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2040 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2041
2042 return;
2043 }
2044
2045 ma = lp_build_select3(coord_bld, as_ge_at, ar_ge_as_at, s, t, r);
2046 mai = LLVMBuildBitCast(builder, ma, cint_vec_type, "");
2047 signmabit = LLVMBuildAnd(builder, mai, signmask, "");
2048
2049 si = LLVMBuildBitCast(builder, s, cint_vec_type, "");
2050 ti = LLVMBuildBitCast(builder, t, cint_vec_type, "");
2051 ri = LLVMBuildBitCast(builder, r, cint_vec_type, "");
2052
2053 /*
2054 * compute all possible new s/t coords, which does the mirroring
2055 * snewx = signma * -r;
2056 * tnewx = -t;
2057 * snewy = s;
2058 * tnewy = signma * r;
2059 * snewz = signma * s;
2060 * tnewz = -t;
2061 */
2062 tnegi = LLVMBuildXor(builder, ti, signmask, "");
2063 rnegi = LLVMBuildXor(builder, ri, signmask, "");
2064
2065 snewx = LLVMBuildXor(builder, signmabit, rnegi, "");
2066 tnewx = tnegi;
2067
2068 snewy = si;
2069 tnewy = LLVMBuildXor(builder, signmabit, ri, "");
2070
2071 snewz = LLVMBuildXor(builder, signmabit, si, "");
2072 tnewz = tnegi;
2073
2074 /* select the mirrored values */
2075 face_s = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, snewx, snewy, snewz);
2076 face_t = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, tnewx, tnewy, tnewz);
2077 face = lp_build_select3(cint_bld, as_ge_at, ar_ge_as_at, facex, facey, facez);
2078
2079 face_s = LLVMBuildBitCast(builder, face_s, coord_vec_type, "");
2080 face_t = LLVMBuildBitCast(builder, face_t, coord_vec_type, "");
2081
2082 /* add +1 for neg face */
2083 /* XXX with AVX probably want to use another select here -
2084 * as long as we ensure vblendvps gets used we can actually
2085 * skip the comparison and just use sign as a "mask" directly.
2086 */
2087 signma = LLVMBuildLShr(builder, mai, signshift, "");
2088 coords[2] = LLVMBuildOr(builder, face, signma, "face");
2089
2090 /* project coords */
2091 imahalfpos = lp_build_cube_imapos(coord_bld, ma);
2092 face_s = lp_build_mul(coord_bld, face_s, imahalfpos);
2093 face_t = lp_build_mul(coord_bld, face_t, imahalfpos);
2094
2095 coords[0] = lp_build_add(coord_bld, face_s, posHalf);
2096 coords[1] = lp_build_add(coord_bld, face_t, posHalf);
2097 }
2098
2099
2100 /**
2101 * Compute the partial offset of a pixel block along an arbitrary axis.
2102 *
2103 * @param coord coordinate in pixels
2104 * @param stride number of bytes between rows of successive pixel blocks
2105 * @param block_length number of pixels in a pixels block along the coordinate
2106 * axis
2107 * @param out_offset resulting relative offset of the pixel block in bytes
2108 * @param out_subcoord resulting sub-block pixel coordinate
2109 */
2110 void
lp_build_sample_partial_offset(struct lp_build_context * bld,unsigned block_length,LLVMValueRef coord,LLVMValueRef stride,LLVMValueRef * out_offset,LLVMValueRef * out_subcoord)2111 lp_build_sample_partial_offset(struct lp_build_context *bld,
2112 unsigned block_length,
2113 LLVMValueRef coord,
2114 LLVMValueRef stride,
2115 LLVMValueRef *out_offset,
2116 LLVMValueRef *out_subcoord)
2117 {
2118 LLVMBuilderRef builder = bld->gallivm->builder;
2119 LLVMValueRef offset;
2120 LLVMValueRef subcoord;
2121
2122 if (block_length == 1) {
2123 subcoord = bld->zero;
2124 } else {
2125 /*
2126 * Pixel blocks have power of two dimensions. LLVM should convert the
2127 * rem/div to bit arithmetic.
2128 * TODO: Verify this.
2129 * It does indeed BUT it does transform it to scalar (and back) when doing so
2130 * (using roughly extract, shift/and, mov, unpack) (llvm 2.7).
2131 * The generated code looks seriously unfunny and is quite expensive.
2132 */
2133 #if 0
2134 LLVMValueRef block_width = lp_build_const_int_vec(bld->type, block_length);
2135 subcoord = LLVMBuildURem(builder, coord, block_width, "");
2136 coord = LLVMBuildUDiv(builder, coord, block_width, "");
2137 #else
2138 unsigned logbase2 = util_logbase2(block_length);
2139 LLVMValueRef block_shift = lp_build_const_int_vec(bld->gallivm, bld->type, logbase2);
2140 LLVMValueRef block_mask = lp_build_const_int_vec(bld->gallivm, bld->type, block_length - 1);
2141 subcoord = LLVMBuildAnd(builder, coord, block_mask, "");
2142 coord = LLVMBuildLShr(builder, coord, block_shift, "");
2143 #endif
2144 }
2145
2146 offset = lp_build_mul(bld, coord, stride);
2147
2148 assert(out_offset);
2149 assert(out_subcoord);
2150
2151 *out_offset = offset;
2152 *out_subcoord = subcoord;
2153 }
2154
2155
2156 /**
2157 * Compute the offset of a pixel block.
2158 *
2159 * x, y, z, y_stride, z_stride are vectors, and they refer to pixels.
2160 *
2161 * Returns the relative offset and i,j sub-block coordinates
2162 */
2163 void
lp_build_sample_offset(struct lp_build_context * bld,const struct util_format_description * format_desc,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef y_stride,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2164 lp_build_sample_offset(struct lp_build_context *bld,
2165 const struct util_format_description *format_desc,
2166 LLVMValueRef x,
2167 LLVMValueRef y,
2168 LLVMValueRef z,
2169 LLVMValueRef y_stride,
2170 LLVMValueRef z_stride,
2171 LLVMValueRef *out_offset,
2172 LLVMValueRef *out_i,
2173 LLVMValueRef *out_j)
2174 {
2175 LLVMValueRef x_stride;
2176 LLVMValueRef offset;
2177
2178 x_stride = lp_build_const_vec(bld->gallivm, bld->type,
2179 format_desc->block.bits/8);
2180
2181 lp_build_sample_partial_offset(bld,
2182 format_desc->block.width,
2183 x, x_stride,
2184 &offset, out_i);
2185
2186 if (y && y_stride) {
2187 LLVMValueRef y_offset;
2188 lp_build_sample_partial_offset(bld,
2189 format_desc->block.height,
2190 y, y_stride,
2191 &y_offset, out_j);
2192 offset = lp_build_add(bld, offset, y_offset);
2193 } else {
2194 *out_j = bld->zero;
2195 }
2196
2197 if (z && z_stride) {
2198 LLVMValueRef z_offset;
2199 LLVMValueRef k;
2200 lp_build_sample_partial_offset(bld,
2201 1, /* pixel blocks are always 2D */
2202 z, z_stride,
2203 &z_offset, &k);
2204 offset = lp_build_add(bld, offset, z_offset);
2205 }
2206
2207 *out_offset = offset;
2208 }
2209
2210
2211
2212 void
lp_build_tiled_sample_offset(struct lp_build_context * bld,enum pipe_format format,const struct lp_static_texture_state * static_texture_state,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef width,LLVMValueRef height,LLVMValueRef z_stride,LLVMValueRef * out_offset,LLVMValueRef * out_i,LLVMValueRef * out_j)2213 lp_build_tiled_sample_offset(struct lp_build_context *bld,
2214 enum pipe_format format,
2215 const struct lp_static_texture_state *static_texture_state,
2216 LLVMValueRef x,
2217 LLVMValueRef y,
2218 LLVMValueRef z,
2219 LLVMValueRef width,
2220 LLVMValueRef height,
2221 LLVMValueRef z_stride,
2222 LLVMValueRef *out_offset,
2223 LLVMValueRef *out_i,
2224 LLVMValueRef *out_j)
2225 {
2226 struct gallivm_state *gallivm = bld->gallivm;
2227 LLVMBuilderRef builder = gallivm->builder;
2228
2229 assert(static_texture_state->tiled);
2230
2231 uint32_t res_dimensions = 1;
2232 switch (static_texture_state->res_target) {
2233 case PIPE_TEXTURE_2D:
2234 case PIPE_TEXTURE_CUBE:
2235 case PIPE_TEXTURE_RECT:
2236 case PIPE_TEXTURE_2D_ARRAY:
2237 res_dimensions = 2;
2238 break;
2239 case PIPE_TEXTURE_3D:
2240 res_dimensions = 3;
2241 break;
2242 default:
2243 break;
2244 }
2245
2246 uint32_t dimensions = 1;
2247 switch (static_texture_state->target) {
2248 case PIPE_TEXTURE_2D:
2249 case PIPE_TEXTURE_CUBE:
2250 case PIPE_TEXTURE_RECT:
2251 case PIPE_TEXTURE_2D_ARRAY:
2252 dimensions = 2;
2253 break;
2254 case PIPE_TEXTURE_3D:
2255 dimensions = 3;
2256 break;
2257 default:
2258 break;
2259 }
2260
2261 uint32_t block_size[3] = {
2262 util_format_get_blockwidth(format),
2263 util_format_get_blockheight(format),
2264 util_format_get_blockdepth(format),
2265 };
2266
2267 uint32_t sparse_tile_size[3] = {
2268 util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 0) * block_size[0],
2269 util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 1) * block_size[1],
2270 util_format_get_tilesize(format, res_dimensions, static_texture_state->tiled_samples, 2) * block_size[2],
2271 };
2272
2273 LLVMValueRef sparse_tile_size_log2[3] = {
2274 lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[0])),
2275 lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[1])),
2276 lp_build_const_vec(gallivm, bld->type, util_logbase2(sparse_tile_size[2])),
2277 };
2278
2279 LLVMValueRef tile_index = LLVMBuildLShr(builder, x, sparse_tile_size_log2[0], "");
2280
2281 if (y && dimensions > 1) {
2282 LLVMValueRef x_tile_count = lp_build_add(bld, width, lp_build_const_vec(gallivm, bld->type, sparse_tile_size[0] - 1));
2283 x_tile_count = LLVMBuildLShr(builder, x_tile_count, sparse_tile_size_log2[0], "");
2284 LLVMValueRef y_tile = LLVMBuildLShr(builder, y, sparse_tile_size_log2[1], "");
2285 tile_index = lp_build_add(bld, tile_index, lp_build_mul(bld, y_tile, x_tile_count));
2286
2287 if (z && dimensions > 2) {
2288 LLVMValueRef y_tile_count = lp_build_add(bld, height, lp_build_const_vec(gallivm, bld->type, sparse_tile_size[1] - 1));
2289 y_tile_count = LLVMBuildLShr(builder, y_tile_count, sparse_tile_size_log2[1], "");
2290 LLVMValueRef z_tile = LLVMBuildLShr(builder, z, sparse_tile_size_log2[2], "");
2291 tile_index = lp_build_add(bld, tile_index, lp_build_mul(bld, z_tile, lp_build_mul(bld, x_tile_count, y_tile_count)));
2292 }
2293 }
2294
2295 LLVMValueRef offset = LLVMBuildShl(builder, tile_index, lp_build_const_vec(gallivm, bld->type, 16), "");
2296
2297 LLVMValueRef sparse_tile_masks[3] = {
2298 lp_build_const_vec(gallivm, bld->type, sparse_tile_size[0] - 1),
2299 lp_build_const_vec(gallivm, bld->type, sparse_tile_size[1] - 1),
2300 lp_build_const_vec(gallivm, bld->type, sparse_tile_size[2] - 1),
2301 };
2302
2303 x = LLVMBuildAnd(builder, x, sparse_tile_masks[0], "");
2304 LLVMValueRef x_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format));
2305
2306 LLVMValueRef x_offset;
2307 lp_build_sample_partial_offset(bld, block_size[0],
2308 x, x_stride, &x_offset, out_i);
2309 offset = lp_build_add(bld, offset, x_offset);
2310
2311 if (y && dimensions > 1) {
2312 y = LLVMBuildAnd(builder, y, sparse_tile_masks[1], "");
2313 LLVMValueRef y_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format) *
2314 sparse_tile_size[0] / block_size[0]);
2315
2316 LLVMValueRef y_offset;
2317 lp_build_sample_partial_offset(bld, block_size[1],
2318 y, y_stride, &y_offset, out_j);
2319 offset = lp_build_add(bld, offset, y_offset);
2320 } else {
2321 *out_j = bld->zero;
2322 }
2323
2324 if (z && (z_stride || dimensions > 2)) {
2325 if (dimensions > 2) {
2326 z = LLVMBuildAnd(builder, z, sparse_tile_masks[2], "");
2327 z_stride = lp_build_const_vec(gallivm, bld->type, util_format_get_blocksize(format) *
2328 sparse_tile_size[0] / block_size[0] *
2329 sparse_tile_size[1] / block_size[1]);
2330 }
2331
2332 LLVMValueRef z_offset;
2333 LLVMValueRef k;
2334 lp_build_sample_partial_offset(bld, 1, z, z_stride, &z_offset, &k);
2335 offset = lp_build_add(bld, offset, z_offset);
2336 }
2337
2338 *out_offset = offset;
2339 }
2340
2341
2342 static LLVMValueRef
lp_build_sample_min(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2343 lp_build_sample_min(struct lp_build_context *bld,
2344 LLVMValueRef x,
2345 LLVMValueRef v0,
2346 LLVMValueRef v1)
2347 {
2348 /* if the incoming LERP weight is 0 then the min/max
2349 * should ignore that value. */
2350 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2351 bld->type,
2352 PIPE_FUNC_NOTEQUAL,
2353 x, bld->zero);
2354 LLVMValueRef min = lp_build_min(bld, v0, v1);
2355
2356 return lp_build_select(bld, mask, min, v0);
2357 }
2358
2359
2360 static LLVMValueRef
lp_build_sample_max(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef v0,LLVMValueRef v1)2361 lp_build_sample_max(struct lp_build_context *bld,
2362 LLVMValueRef x,
2363 LLVMValueRef v0,
2364 LLVMValueRef v1)
2365 {
2366 /* if the incoming LERP weight is 0 then the min/max
2367 * should ignore that value. */
2368 LLVMValueRef mask = lp_build_compare(bld->gallivm,
2369 bld->type,
2370 PIPE_FUNC_NOTEQUAL,
2371 x, bld->zero);
2372 LLVMValueRef max = lp_build_max(bld, v0, v1);
2373
2374 return lp_build_select(bld, mask, max, v0);
2375 }
2376
2377
2378 static LLVMValueRef
lp_build_sample_min_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2379 lp_build_sample_min_2d(struct lp_build_context *bld,
2380 LLVMValueRef x,
2381 LLVMValueRef y,
2382 LLVMValueRef a,
2383 LLVMValueRef b,
2384 LLVMValueRef c,
2385 LLVMValueRef d)
2386 {
2387 LLVMValueRef v0 = lp_build_sample_min(bld, x, a, b);
2388 LLVMValueRef v1 = lp_build_sample_min(bld, x, c, d);
2389 return lp_build_sample_min(bld, y, v0, v1);
2390 }
2391
2392
2393 static LLVMValueRef
lp_build_sample_max_2d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d)2394 lp_build_sample_max_2d(struct lp_build_context *bld,
2395 LLVMValueRef x,
2396 LLVMValueRef y,
2397 LLVMValueRef a,
2398 LLVMValueRef b,
2399 LLVMValueRef c,
2400 LLVMValueRef d)
2401 {
2402 LLVMValueRef v0 = lp_build_sample_max(bld, x, a, b);
2403 LLVMValueRef v1 = lp_build_sample_max(bld, x, c, d);
2404 return lp_build_sample_max(bld, y, v0, v1);
2405 }
2406
2407
2408 static LLVMValueRef
lp_build_sample_min_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2409 lp_build_sample_min_3d(struct lp_build_context *bld,
2410 LLVMValueRef x,
2411 LLVMValueRef y,
2412 LLVMValueRef z,
2413 LLVMValueRef a, LLVMValueRef b,
2414 LLVMValueRef c, LLVMValueRef d,
2415 LLVMValueRef e, LLVMValueRef f,
2416 LLVMValueRef g, LLVMValueRef h)
2417 {
2418 LLVMValueRef v0 = lp_build_sample_min_2d(bld, x, y, a, b, c, d);
2419 LLVMValueRef v1 = lp_build_sample_min_2d(bld, x, y, e, f, g, h);
2420 return lp_build_sample_min(bld, z, v0, v1);
2421 }
2422
2423
2424 static LLVMValueRef
lp_build_sample_max_3d(struct lp_build_context * bld,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef a,LLVMValueRef b,LLVMValueRef c,LLVMValueRef d,LLVMValueRef e,LLVMValueRef f,LLVMValueRef g,LLVMValueRef h)2425 lp_build_sample_max_3d(struct lp_build_context *bld,
2426 LLVMValueRef x,
2427 LLVMValueRef y,
2428 LLVMValueRef z,
2429 LLVMValueRef a, LLVMValueRef b,
2430 LLVMValueRef c, LLVMValueRef d,
2431 LLVMValueRef e, LLVMValueRef f,
2432 LLVMValueRef g, LLVMValueRef h)
2433 {
2434 LLVMValueRef v0 = lp_build_sample_max_2d(bld, x, y, a, b, c, d);
2435 LLVMValueRef v1 = lp_build_sample_max_2d(bld, x, y, e, f, g, h);
2436 return lp_build_sample_max(bld, z, v0, v1);
2437 }
2438
2439
2440 void
lp_build_reduce_filter(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * out)2441 lp_build_reduce_filter(struct lp_build_context *bld,
2442 enum pipe_tex_reduction_mode mode,
2443 unsigned flags,
2444 unsigned num_chan,
2445 LLVMValueRef x,
2446 LLVMValueRef *v00,
2447 LLVMValueRef *v01,
2448 LLVMValueRef *out)
2449 {
2450 unsigned chan;
2451 switch (mode) {
2452 case PIPE_TEX_REDUCTION_MIN:
2453 for (chan = 0; chan < num_chan; chan++)
2454 out[chan] = lp_build_sample_min(bld, x, v00[chan], v01[chan]);
2455 break;
2456 case PIPE_TEX_REDUCTION_MAX:
2457 for (chan = 0; chan < num_chan; chan++)
2458 out[chan] = lp_build_sample_max(bld, x, v00[chan], v01[chan]);
2459 break;
2460 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2461 default:
2462 for (chan = 0; chan < num_chan; chan++)
2463 out[chan] = lp_build_lerp(bld, x, v00[chan], v01[chan], flags);
2464 break;
2465 }
2466 }
2467
2468
2469 void
lp_build_reduce_filter_2d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef * v00,LLVMValueRef * v01,LLVMValueRef * v10,LLVMValueRef * v11,LLVMValueRef * out)2470 lp_build_reduce_filter_2d(struct lp_build_context *bld,
2471 enum pipe_tex_reduction_mode mode,
2472 unsigned flags,
2473 unsigned num_chan,
2474 LLVMValueRef x,
2475 LLVMValueRef y,
2476 LLVMValueRef *v00,
2477 LLVMValueRef *v01,
2478 LLVMValueRef *v10,
2479 LLVMValueRef *v11,
2480 LLVMValueRef *out)
2481 {
2482 switch (mode) {
2483 case PIPE_TEX_REDUCTION_MIN:
2484 for (unsigned chan = 0; chan < num_chan; chan++)
2485 out[chan] = lp_build_sample_min_2d(bld, x, y, v00[chan], v01[chan],
2486 v10[chan], v11[chan]);
2487 break;
2488 case PIPE_TEX_REDUCTION_MAX:
2489 for (unsigned chan = 0; chan < num_chan; chan++)
2490 out[chan] = lp_build_sample_max_2d(bld, x, y, v00[chan], v01[chan],
2491 v10[chan], v11[chan]);
2492 break;
2493 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2494 default:
2495 for (unsigned chan = 0; chan < num_chan; chan++)
2496 out[chan] = lp_build_lerp_2d(bld, x, y, v00[chan], v01[chan],
2497 v10[chan], v11[chan], flags);
2498 break;
2499 }
2500 }
2501
2502
2503 void
lp_build_reduce_filter_3d(struct lp_build_context * bld,enum pipe_tex_reduction_mode mode,unsigned flags,unsigned num_chan,LLVMValueRef x,LLVMValueRef y,LLVMValueRef z,LLVMValueRef * v000,LLVMValueRef * v001,LLVMValueRef * v010,LLVMValueRef * v011,LLVMValueRef * v100,LLVMValueRef * v101,LLVMValueRef * v110,LLVMValueRef * v111,LLVMValueRef * out)2504 lp_build_reduce_filter_3d(struct lp_build_context *bld,
2505 enum pipe_tex_reduction_mode mode,
2506 unsigned flags,
2507 unsigned num_chan,
2508 LLVMValueRef x,
2509 LLVMValueRef y,
2510 LLVMValueRef z,
2511 LLVMValueRef *v000,
2512 LLVMValueRef *v001,
2513 LLVMValueRef *v010,
2514 LLVMValueRef *v011,
2515 LLVMValueRef *v100,
2516 LLVMValueRef *v101,
2517 LLVMValueRef *v110,
2518 LLVMValueRef *v111,
2519 LLVMValueRef *out)
2520 {
2521 switch (mode) {
2522 case PIPE_TEX_REDUCTION_MIN:
2523 for (unsigned chan = 0; chan < num_chan; chan++)
2524 out[chan] = lp_build_sample_min_3d(bld, x, y, z,
2525 v000[chan], v001[chan], v010[chan], v011[chan],
2526 v100[chan], v101[chan], v110[chan], v111[chan]);
2527 break;
2528 case PIPE_TEX_REDUCTION_MAX:
2529 for (unsigned chan = 0; chan < num_chan; chan++)
2530 out[chan] = lp_build_sample_max_3d(bld, x, y, z,
2531 v000[chan], v001[chan], v010[chan], v011[chan],
2532 v100[chan], v101[chan], v110[chan], v111[chan]);
2533 break;
2534 case PIPE_TEX_REDUCTION_WEIGHTED_AVERAGE:
2535 default:
2536 for (unsigned chan = 0; chan < num_chan; chan++)
2537 out[chan] = lp_build_lerp_3d(bld, x, y, z,
2538 v000[chan], v001[chan], v010[chan], v011[chan],
2539 v100[chan], v101[chan], v110[chan], v111[chan],
2540 flags);
2541 break;
2542 }
2543 }
2544
2545
2546 /*
2547 * generated from
2548 * const float alpha = 2;
2549 * for (unsigned i = 0; i < WEIGHT_LUT_SIZE; i++) {
2550 * const float r2 = (float) i / (float) (WEIGHT_LUT_SIZE - 1);
2551 * const float weight = (float)expf(-alpha * r2);
2552 */
2553 static const float aniso_filter_table[1024] = {
2554 1.000000, 0.998047, 0.996098, 0.994152, 0.992210, 0.990272, 0.988338, 0.986408,
2555 0.984481, 0.982559, 0.980640, 0.978724, 0.976813, 0.974905, 0.973001, 0.971100,
2556 0.969204, 0.967311, 0.965421, 0.963536, 0.961654, 0.959776, 0.957901, 0.956030,
2557 0.954163, 0.952299, 0.950439, 0.948583, 0.946730, 0.944881, 0.943036, 0.941194,
2558 0.939356, 0.937521, 0.935690, 0.933862, 0.932038, 0.930218, 0.928401, 0.926588,
2559 0.924778, 0.922972, 0.921169, 0.919370, 0.917575, 0.915782, 0.913994, 0.912209,
2560 0.910427, 0.908649, 0.906874, 0.905103, 0.903335, 0.901571, 0.899810, 0.898052,
2561 0.896298, 0.894548, 0.892801, 0.891057, 0.889317, 0.887580, 0.885846, 0.884116,
2562 0.882389, 0.880666, 0.878946, 0.877229, 0.875516, 0.873806, 0.872099, 0.870396,
2563 0.868696, 0.866999, 0.865306, 0.863616, 0.861929, 0.860245, 0.858565, 0.856888,
2564 0.855215, 0.853544, 0.851877, 0.850213, 0.848553, 0.846896, 0.845241, 0.843591,
2565 0.841943, 0.840299, 0.838657, 0.837019, 0.835385, 0.833753, 0.832124, 0.830499,
2566 0.828877, 0.827258, 0.825643, 0.824030, 0.822421, 0.820814, 0.819211, 0.817611,
2567 0.816014, 0.814420, 0.812830, 0.811242, 0.809658, 0.808076, 0.806498, 0.804923,
2568 0.803351, 0.801782, 0.800216, 0.798653, 0.797093, 0.795536, 0.793982, 0.792432,
2569 0.790884, 0.789339, 0.787798, 0.786259, 0.784723, 0.783191, 0.781661, 0.780134,
2570 0.778610, 0.777090, 0.775572, 0.774057, 0.772545, 0.771037, 0.769531, 0.768028,
2571 0.766528, 0.765030, 0.763536, 0.762045, 0.760557, 0.759071, 0.757589, 0.756109,
2572 0.754632, 0.753158, 0.751687, 0.750219, 0.748754, 0.747291, 0.745832, 0.744375,
2573 0.742921, 0.741470, 0.740022, 0.738577, 0.737134, 0.735694, 0.734258, 0.732823,
2574 0.731392, 0.729964, 0.728538, 0.727115, 0.725695, 0.724278, 0.722863, 0.721451,
2575 0.720042, 0.718636, 0.717232, 0.715831, 0.714433, 0.713038, 0.711645, 0.710255,
2576 0.708868, 0.707483, 0.706102, 0.704723, 0.703346, 0.701972, 0.700601, 0.699233,
2577 0.697867, 0.696504, 0.695144, 0.693786, 0.692431, 0.691079, 0.689729, 0.688382,
2578 0.687037, 0.685696, 0.684356, 0.683020, 0.681686, 0.680354, 0.679025, 0.677699,
2579 0.676376, 0.675054, 0.673736, 0.672420, 0.671107, 0.669796, 0.668488, 0.667182,
2580 0.665879, 0.664579, 0.663281, 0.661985, 0.660692, 0.659402, 0.658114, 0.656828,
2581 0.655546, 0.654265, 0.652987, 0.651712, 0.650439, 0.649169, 0.647901, 0.646635,
2582 0.645372, 0.644112, 0.642854, 0.641598, 0.640345, 0.639095, 0.637846, 0.636601,
2583 0.635357, 0.634116, 0.632878, 0.631642, 0.630408, 0.629177, 0.627948, 0.626721,
2584 0.625497, 0.624276, 0.623056, 0.621839, 0.620625, 0.619413, 0.618203, 0.616996,
2585 0.615790, 0.614588, 0.613387, 0.612189, 0.610994, 0.609800, 0.608609, 0.607421,
2586 0.606234, 0.605050, 0.603868, 0.602689, 0.601512, 0.600337, 0.599165, 0.597994,
2587 0.596826, 0.595661, 0.594497, 0.593336, 0.592177, 0.591021, 0.589866, 0.588714,
2588 0.587564, 0.586417, 0.585272, 0.584128, 0.582988, 0.581849, 0.580712, 0.579578,
2589 0.578446, 0.577317, 0.576189, 0.575064, 0.573940, 0.572819, 0.571701, 0.570584,
2590 0.569470, 0.568357, 0.567247, 0.566139, 0.565034, 0.563930, 0.562829, 0.561729,
2591 0.560632, 0.559537, 0.558444, 0.557354, 0.556265, 0.555179, 0.554094, 0.553012,
2592 0.551932, 0.550854, 0.549778, 0.548704, 0.547633, 0.546563, 0.545496, 0.544430,
2593 0.543367, 0.542306, 0.541246, 0.540189, 0.539134, 0.538081, 0.537030, 0.535981,
2594 0.534935, 0.533890, 0.532847, 0.531806, 0.530768, 0.529731, 0.528696, 0.527664,
2595 0.526633, 0.525604, 0.524578, 0.523553, 0.522531, 0.521510, 0.520492, 0.519475,
2596 0.518460, 0.517448, 0.516437, 0.515429, 0.514422, 0.513417, 0.512414, 0.511414,
2597 0.510415, 0.509418, 0.508423, 0.507430, 0.506439, 0.505450, 0.504462, 0.503477,
2598 0.502494, 0.501512, 0.500533, 0.499555, 0.498580, 0.497606, 0.496634, 0.495664,
2599 0.494696, 0.493730, 0.492765, 0.491803, 0.490842, 0.489884, 0.488927, 0.487972,
2600 0.487019, 0.486068, 0.485118, 0.484171, 0.483225, 0.482281, 0.481339, 0.480399,
2601 0.479461, 0.478524, 0.477590, 0.476657, 0.475726, 0.474797, 0.473870, 0.472944,
2602 0.472020, 0.471098, 0.470178, 0.469260, 0.468343, 0.467429, 0.466516, 0.465605,
2603 0.464695, 0.463788, 0.462882, 0.461978, 0.461075, 0.460175, 0.459276, 0.458379,
2604 0.457484, 0.456590, 0.455699, 0.454809, 0.453920, 0.453034, 0.452149, 0.451266,
2605 0.450384, 0.449505, 0.448627, 0.447751, 0.446876, 0.446003, 0.445132, 0.444263,
2606 0.443395, 0.442529, 0.441665, 0.440802, 0.439941, 0.439082, 0.438224, 0.437368,
2607 0.436514, 0.435662, 0.434811, 0.433961, 0.433114, 0.432268, 0.431424, 0.430581,
2608 0.429740, 0.428901, 0.428063, 0.427227, 0.426393, 0.425560, 0.424729, 0.423899,
2609 0.423071, 0.422245, 0.421420, 0.420597, 0.419776, 0.418956, 0.418137, 0.417321,
2610 0.416506, 0.415692, 0.414880, 0.414070, 0.413261, 0.412454, 0.411648, 0.410844,
2611 0.410042, 0.409241, 0.408442, 0.407644, 0.406848, 0.406053, 0.405260, 0.404469,
2612 0.403679, 0.402890, 0.402103, 0.401318, 0.400534, 0.399752, 0.398971, 0.398192,
2613 0.397414, 0.396638, 0.395863, 0.395090, 0.394319, 0.393548, 0.392780, 0.392013,
2614 0.391247, 0.390483, 0.389720, 0.388959, 0.388199, 0.387441, 0.386684, 0.385929,
2615 0.385175, 0.384423, 0.383672, 0.382923, 0.382175, 0.381429, 0.380684, 0.379940,
2616 0.379198, 0.378457, 0.377718, 0.376980, 0.376244, 0.375509, 0.374776, 0.374044,
2617 0.373313, 0.372584, 0.371856, 0.371130, 0.370405, 0.369682, 0.368960, 0.368239,
2618 0.367520, 0.366802, 0.366086, 0.365371, 0.364657, 0.363945, 0.363234, 0.362525,
2619 0.361817, 0.361110, 0.360405, 0.359701, 0.358998, 0.358297, 0.357597, 0.356899,
2620 0.356202, 0.355506, 0.354812, 0.354119, 0.353427, 0.352737, 0.352048, 0.351360,
2621 0.350674, 0.349989, 0.349306, 0.348623, 0.347942, 0.347263, 0.346585, 0.345908,
2622 0.345232, 0.344558, 0.343885, 0.343213, 0.342543, 0.341874, 0.341206, 0.340540,
2623 0.339874, 0.339211, 0.338548, 0.337887, 0.337227, 0.336568, 0.335911, 0.335255,
2624 0.334600, 0.333947, 0.333294, 0.332643, 0.331994, 0.331345, 0.330698, 0.330052,
2625 0.329408, 0.328764, 0.328122, 0.327481, 0.326842, 0.326203, 0.325566, 0.324930,
2626 0.324296, 0.323662, 0.323030, 0.322399, 0.321770, 0.321141, 0.320514, 0.319888,
2627 0.319263, 0.318639, 0.318017, 0.317396, 0.316776, 0.316157, 0.315540, 0.314924,
2628 0.314309, 0.313695, 0.313082, 0.312470, 0.311860, 0.311251, 0.310643, 0.310036,
2629 0.309431, 0.308827, 0.308223, 0.307621, 0.307021, 0.306421, 0.305822, 0.305225,
2630 0.304629, 0.304034, 0.303440, 0.302847, 0.302256, 0.301666, 0.301076, 0.300488,
2631 0.299902, 0.299316, 0.298731, 0.298148, 0.297565, 0.296984, 0.296404, 0.295825,
2632 0.295247, 0.294671, 0.294095, 0.293521, 0.292948, 0.292375, 0.291804, 0.291234,
2633 0.290666, 0.290098, 0.289531, 0.288966, 0.288401, 0.287838, 0.287276, 0.286715,
2634 0.286155, 0.285596, 0.285038, 0.284482, 0.283926, 0.283371, 0.282818, 0.282266,
2635 0.281714, 0.281164, 0.280615, 0.280067, 0.279520, 0.278974, 0.278429, 0.277885,
2636 0.277342, 0.276801, 0.276260, 0.275721, 0.275182, 0.274645, 0.274108, 0.273573,
2637 0.273038, 0.272505, 0.271973, 0.271442, 0.270912, 0.270382, 0.269854, 0.269327,
2638 0.268801, 0.268276, 0.267752, 0.267229, 0.266707, 0.266186, 0.265667, 0.265148,
2639 0.264630, 0.264113, 0.263597, 0.263082, 0.262568, 0.262056, 0.261544, 0.261033,
2640 0.260523, 0.260014, 0.259506, 0.259000, 0.258494, 0.257989, 0.257485, 0.256982,
2641 0.256480, 0.255979, 0.255479, 0.254980, 0.254482, 0.253985, 0.253489, 0.252994,
2642 0.252500, 0.252007, 0.251515, 0.251023, 0.250533, 0.250044, 0.249555, 0.249068,
2643 0.248582, 0.248096, 0.247611, 0.247128, 0.246645, 0.246163, 0.245683, 0.245203,
2644 0.244724, 0.244246, 0.243769, 0.243293, 0.242818, 0.242343, 0.241870, 0.241398,
2645 0.240926, 0.240456, 0.239986, 0.239517, 0.239049, 0.238583, 0.238117, 0.237651,
2646 0.237187, 0.236724, 0.236262, 0.235800, 0.235340, 0.234880, 0.234421, 0.233963,
2647 0.233506, 0.233050, 0.232595, 0.232141, 0.231688, 0.231235, 0.230783, 0.230333,
2648 0.229883, 0.229434, 0.228986, 0.228538, 0.228092, 0.227647, 0.227202, 0.226758,
2649 0.226315, 0.225873, 0.225432, 0.224992, 0.224552, 0.224114, 0.223676, 0.223239,
2650 0.222803, 0.222368, 0.221934, 0.221500, 0.221068, 0.220636, 0.220205, 0.219775,
2651 0.219346, 0.218917, 0.218490, 0.218063, 0.217637, 0.217212, 0.216788, 0.216364,
2652 0.215942, 0.215520, 0.215099, 0.214679, 0.214260, 0.213841, 0.213423, 0.213007,
2653 0.212591, 0.212175, 0.211761, 0.211347, 0.210935, 0.210523, 0.210111, 0.209701,
2654 0.209291, 0.208883, 0.208475, 0.208068, 0.207661, 0.207256, 0.206851, 0.206447,
2655 0.206044, 0.205641, 0.205239, 0.204839, 0.204439, 0.204039, 0.203641, 0.203243,
2656 0.202846, 0.202450, 0.202054, 0.201660, 0.201266, 0.200873, 0.200481, 0.200089,
2657 0.199698, 0.199308, 0.198919, 0.198530, 0.198143, 0.197756, 0.197369, 0.196984,
2658 0.196599, 0.196215, 0.195832, 0.195449, 0.195068, 0.194687, 0.194306, 0.193927,
2659 0.193548, 0.193170, 0.192793, 0.192416, 0.192041, 0.191665, 0.191291, 0.190917,
2660 0.190545, 0.190172, 0.189801, 0.189430, 0.189060, 0.188691, 0.188323, 0.187955,
2661 0.187588, 0.187221, 0.186856, 0.186491, 0.186126, 0.185763, 0.185400, 0.185038,
2662 0.184676, 0.184316, 0.183956, 0.183597, 0.183238, 0.182880, 0.182523, 0.182166,
2663 0.181811, 0.181455, 0.181101, 0.180747, 0.180394, 0.180042, 0.179690, 0.179339,
2664 0.178989, 0.178640, 0.178291, 0.177942, 0.177595, 0.177248, 0.176902, 0.176556,
2665 0.176211, 0.175867, 0.175524, 0.175181, 0.174839, 0.174497, 0.174157, 0.173816,
2666 0.173477, 0.173138, 0.172800, 0.172462, 0.172126, 0.171789, 0.171454, 0.171119,
2667 0.170785, 0.170451, 0.170118, 0.169786, 0.169454, 0.169124, 0.168793, 0.168463,
2668 0.168134, 0.167806, 0.167478, 0.167151, 0.166825, 0.166499, 0.166174, 0.165849,
2669 0.165525, 0.165202, 0.164879, 0.164557, 0.164236, 0.163915, 0.163595, 0.163275,
2670 0.162957, 0.162638, 0.162321, 0.162004, 0.161687, 0.161371, 0.161056, 0.160742,
2671 0.160428, 0.160114, 0.159802, 0.159489, 0.159178, 0.158867, 0.158557, 0.158247,
2672 0.157938, 0.157630, 0.157322, 0.157014, 0.156708, 0.156402, 0.156096, 0.155791,
2673 0.155487, 0.155183, 0.154880, 0.154578, 0.154276, 0.153975, 0.153674, 0.153374,
2674 0.153074, 0.152775, 0.152477, 0.152179, 0.151882, 0.151585, 0.151289, 0.150994,
2675 0.150699, 0.150404, 0.150111, 0.149817, 0.149525, 0.149233, 0.148941, 0.148650,
2676 0.148360, 0.148070, 0.147781, 0.147492, 0.147204, 0.146917, 0.146630, 0.146344,
2677 0.146058, 0.145772, 0.145488, 0.145204, 0.144920, 0.144637, 0.144354, 0.144072,
2678 0.143791, 0.143510, 0.143230, 0.142950, 0.142671, 0.142392, 0.142114, 0.141837,
2679 0.141560, 0.141283, 0.141007, 0.140732, 0.140457, 0.140183, 0.139909, 0.139636,
2680 0.139363, 0.139091, 0.138819, 0.138548, 0.138277, 0.138007, 0.137738, 0.137469,
2681 0.137200, 0.136932, 0.136665, 0.136398, 0.136131, 0.135865, 0.135600, 0.135335,
2682 };
2683
2684
2685 const float *
lp_build_sample_aniso_filter_table(void)2686 lp_build_sample_aniso_filter_table(void)
2687 {
2688 return aniso_filter_table;
2689 }
2690