xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_linear.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/detect.h"
30 
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_rect.h"
35 #include "util/u_sse.h"
36 
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42 
43 
44 #if DETECT_ARCH_SSE
45 
46 
47 /* For debugging (LP_DEBUG=linear), shade areas of run-time fallback
48  * purple.  Keep blending active so we can see more of what's going
49  * on.
50  */
51 static bool
linear_fallback(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,uint8_t * color,unsigned stride)52 linear_fallback(const struct lp_rast_state *state,
53                 unsigned x, unsigned y,
54                 unsigned width, unsigned height,
55                 uint8_t *color,
56                 unsigned stride)
57 {
58    unsigned col = 0x808000ff;
59    int i;
60 
61    for (y = 0; y < height; y++) {
62       for (i = 0; i < 64; i++) {
63          *((uint32_t *)(color + y*stride) + x + i) = col;
64       }
65    }
66 
67    return true;
68 }
69 
70 
71 /*
72  * Run our configurable linear shader pipeline:
73  * x,y is the surface position of the linear region, width, height is the size.
74  * Return TRUE for success, FALSE otherwise.
75  */
76 static bool
lp_fs_linear_run(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)77 lp_fs_linear_run(const struct lp_rast_state *state,
78                  unsigned x, unsigned y,
79                  unsigned width, unsigned height,
80                  const float (*a0)[4],
81                  const float (*dadx)[4],
82                  const float (*dady)[4],
83                  uint8_t *color,
84                  unsigned stride)
85 {
86    const struct lp_fragment_shader_variant *variant = state->variant;
87    const struct lp_tgsi_info *info = &variant->shader->info;
88    const struct lp_fragment_shader_variant_key *key = &variant->key;
89    bool rgba_order = (key->cbuf_format[0] == PIPE_FORMAT_R8G8B8A8_UNORM ||
90                       key->cbuf_format[0] == PIPE_FORMAT_R8G8B8X8_UNORM);
91    uint8_t constants[LP_MAX_LINEAR_CONSTANTS * 4];
92 
93    LP_DBG(DEBUG_RAST, "%s\n", __func__);
94 
95    /* Require constant w in these rectangles:
96     */
97    if (dadx[0][3] != 0.0f ||
98        dady[0][3] != 0.0f) {
99       if (LP_DEBUG & DEBUG_LINEAR2)
100          debug_printf("  -- w not constant\n");
101       goto fail;
102    }
103 
104    /* XXX: Per statechange:
105     */
106    int nr_consts = state->jit_resources.constants[0].num_elements;
107 
108    for (int i = 0; i < nr_consts; i++){
109       float val = state->jit_resources.constants[0].f[i];
110       if (val < 0.0f || val > 1.0f) {
111          if (LP_DEBUG & DEBUG_LINEAR2)
112             debug_printf("  -- const[%d] out of range %f\n", i, val);
113          goto fail;
114       }
115       constants[i] = (uint8_t)(val * 255.0f);
116    }
117 
118    struct lp_jit_linear_context jit;
119    jit.constants = (const uint8_t (*)[4])constants;
120 
121    if (!rgba_order) {
122       jit.blend_color =
123          state->jit_context.u8_blend_color[32] +
124          (state->jit_context.u8_blend_color[16] << 8) +
125          (state->jit_context.u8_blend_color[0] << 16) +
126          (state->jit_context.u8_blend_color[48] << 24);
127    } else {
128       jit.blend_color =
129          (state->jit_context.u8_blend_color[32] << 24) +
130          (state->jit_context.u8_blend_color[16] << 16) +
131          (state->jit_context.u8_blend_color[0] << 8) +
132          (state->jit_context.u8_blend_color[48] << 0);
133    }
134 
135    jit.alpha_ref_value = float_to_ubyte(state->jit_context.alpha_ref_value);
136 
137    /* XXX: Per primitive:
138     */
139    struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
140    const float oow = 1.0f / a0[0][3];
141    unsigned input_mask = variant->linear_input_mask;
142    while (input_mask) {
143       int i = u_bit_scan(&input_mask);
144       unsigned usage_mask = info->base.input_usage_mask[i];
145       bool perspective =
146             info->base.input_interpolate[i] == TGSI_INTERPOLATE_PERSPECTIVE ||
147             (info->base.input_interpolate[i] == TGSI_INTERPOLATE_COLOR &&
148              !variant->key.flatshade);
149       if (!lp_linear_init_interp(&interp[i],
150                                  x, y, width, height,
151                                  usage_mask,
152                                  perspective,
153                                  oow,
154                                  a0[i+1],
155                                  dadx[i+1],
156                                  dady[i+1])) {
157          if (LP_DEBUG & DEBUG_LINEAR2)
158             debug_printf("  -- init_interp(%d) failed\n", i);
159          goto fail;
160       }
161 
162       jit.inputs[i] = &interp[i].base;
163    }
164 
165    /* XXX: Per primitive: Initialize linear or nearest samplers:
166     */
167    struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
168    const int nr_tex = info->num_texs;
169    for (int i = 0; i < nr_tex; i++) {
170       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
171       const unsigned tex_unit = tex_info->texture_unit;
172       const unsigned samp_unit = tex_info->sampler_unit;
173       //const unsigned fs_s_input = tex_info->coord[0].u.index;
174       //const unsigned fs_t_input = tex_info->coord[1].u.index;
175 
176       // xxx investigate why these fail in deqp-vk
177       //assert(variant->linear_input_mask & (1 << fs_s_input));
178       //assert(variant->linear_input_mask & (1 << fs_t_input));
179 
180       /* XXX: some texture coordinates are linear!
181        */
182       //boolean perspective = (info->base.input_interpolate[i] ==
183       //                       TGSI_INTERPOLATE_PERSPECTIVE);
184 
185       if (!lp_linear_init_sampler(&samp[i], tex_info,
186                   lp_fs_variant_key_sampler_idx(&variant->key, samp_unit),
187                   &state->jit_resources.textures[tex_unit],
188                   x, y, width, height, a0, dadx, dady, rgba_order)) {
189          if (LP_DEBUG & DEBUG_LINEAR2)
190             debug_printf("  -- init_sampler(%d) failed\n", i);
191          goto fail;
192       }
193 
194       jit.tex[i] = &samp[i].base;
195    }
196 
197    /* JIT function already does blending */
198    jit.color0 = color + x * 4 + y * stride;
199    lp_jit_linear_llvm_func jit_func = variant->jit_linear_llvm;
200 
201    for (unsigned iy = 0; iy < height; iy++) {
202       jit_func(&jit, 0, 0, width);  // x=0, y=0
203       jit.color0 += stride;
204    }
205 
206    return true;
207 
208 fail:
209    /* Visually distinguish this from other fallbacks:
210     */
211    if (LP_DEBUG & DEBUG_LINEAR) {
212       return linear_fallback(state, x, y, width, height, color, stride);
213    }
214 
215    return false;
216 }
217 
218 
219 static void
check_linear_interp_mask_a(struct lp_fragment_shader_variant * variant)220 check_linear_interp_mask_a(struct lp_fragment_shader_variant *variant)
221 {
222    const struct lp_tgsi_info *info = &variant->shader->info;
223    struct lp_jit_linear_context jit;
224 
225    struct lp_linear_sampler samp[LP_MAX_LINEAR_TEXTURES];
226    struct lp_linear_interp interp[LP_MAX_LINEAR_INPUTS];
227    uint8_t constants[LP_MAX_LINEAR_CONSTANTS][4];
228    alignas(16) uint8_t color0[TILE_SIZE*4];
229 
230    const int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
231    const int nr_tex = info->num_texs;
232 
233    LP_DBG(DEBUG_RAST, "%s\n", __func__);
234 
235    jit.constants = (const uint8_t (*)[4])constants;
236 
237    for (int i = 0; i < nr_tex; i++) {
238       lp_linear_init_noop_sampler(&samp[i]);
239       jit.tex[i] = &samp[i].base;
240    }
241 
242    for (int i = 0; i < nr_inputs; i++) {
243       lp_linear_init_noop_interp(&interp[i]);
244       jit.inputs[i] = &interp[i].base;
245    }
246 
247    jit.color0 = color0;
248 
249    (void)variant->jit_linear_llvm(&jit, 0, 0, 0);
250 
251    /* Find out which interpolators were called, and store this as a
252     * mask:
253     */
254    for (int i = 0; i < nr_inputs; i++) {
255       variant->linear_input_mask |= (interp[i].row[0] << i);
256    }
257 }
258 
259 
260 /* Until the above is working, look at texture information and guess
261  * that any input used as a texture coordinate is not used for
262  * anything else.
263  */
264 static void
check_linear_interp_mask_b(struct lp_fragment_shader_variant * variant)265 check_linear_interp_mask_b(struct lp_fragment_shader_variant *variant)
266 {
267    const struct lp_tgsi_info *info = &variant->shader->info;
268    int nr_inputs = info->base.file_max[TGSI_FILE_INPUT]+1;
269    int nr_tex = info->num_texs;
270    unsigned tex_mask = 0;
271    int i;
272 
273    LP_DBG(DEBUG_RAST, "%s\n", __func__);
274 
275    for (i = 0; i < nr_tex; i++) {
276       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
277       const struct lp_tgsi_channel_info *schan = &tex_info->coord[0];
278       const struct lp_tgsi_channel_info *tchan = &tex_info->coord[1];
279       tex_mask |= 1 << schan->u.index;
280       tex_mask |= 1 << tchan->u.index;
281    }
282 
283    variant->linear_input_mask = ((1 << nr_inputs) - 1) & ~tex_mask;
284 }
285 
286 
287 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)288 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
289 {
290    const struct lp_fragment_shader_variant_key *key = &variant->key;
291    const struct lp_fragment_shader *shader = variant->shader;
292    const struct lp_tgsi_info *info = &shader->info;
293 
294    if (info->base.file_max[TGSI_FILE_CONSTANT] >= LP_MAX_LINEAR_CONSTANTS ||
295        info->base.file_max[TGSI_FILE_INPUT] >= LP_MAX_LINEAR_INPUTS) {
296       if (LP_DEBUG & DEBUG_LINEAR)
297          debug_printf("  -- too many inputs/constants\n");
298       goto fail;
299    }
300 
301    /* If we have a fastpath which implements the entire variant, use
302     * that.
303     */
304    if (lp_linear_check_fastpath(variant)) {
305       return;
306    }
307 
308    /* Otherwise, can we build up a spanline-based linear path for this
309     * variant?
310     */
311 
312    /* Check static sampler state.
313     */
314    for (unsigned i = 0; i < info->num_texs; i++) {
315       const struct lp_tgsi_texture_info *tex_info = &info->tex[i];
316       const unsigned unit = tex_info->sampler_unit;
317 
318       /* XXX: Relax this once setup premultiplies by oow:
319        */
320       if (info->base.input_interpolate[unit] != TGSI_INTERPOLATE_PERSPECTIVE) {
321          if (LP_DEBUG & DEBUG_LINEAR)
322             debug_printf(" -- samp[%d]: texcoord not perspective\n", i);
323          goto fail;
324       }
325 
326       struct lp_sampler_static_state *samp =
327          lp_fs_variant_key_sampler_idx(key, unit);
328       if (!lp_linear_check_sampler(samp, tex_info)) {
329          if (LP_DEBUG & DEBUG_LINEAR)
330             debug_printf(" -- samp[%d]: check_sampler failed\n", i);
331          goto fail;
332       }
333    }
334 
335    /* Check shader.  May not have been jitted.
336     */
337    if (variant->linear_function == NULL) {
338       if (LP_DEBUG & DEBUG_LINEAR)
339          debug_printf("  -- no linear shader\n");
340       goto fail;
341    }
342 
343    /* Hook in the catchall shader runner:
344     */
345    variant->jit_linear = lp_fs_linear_run;
346 
347    /* Figure out which inputs we don't need to interpolate (because
348     * they are only used as texture coordinates).  This is important
349     * as we can cope with texture coordinates which exceed 1.0, but
350     * cannot do so for regular inputs.
351     */
352    if (1)
353       check_linear_interp_mask_a(variant);
354    else
355       check_linear_interp_mask_b(variant);
356 
357 
358    if (0) {
359       lp_debug_fs_variant(variant);
360       debug_printf("linear input mask: 0x%x\n", variant->linear_input_mask);
361    }
362 
363    return;
364 
365 fail:
366    if (LP_DEBUG & DEBUG_LINEAR) {
367       lp_debug_fs_variant(variant);
368       debug_printf("    ----> no linear path for this variant\n");
369    }
370 }
371 
372 
373 #else
374 void
lp_linear_check_variant(struct lp_fragment_shader_variant * variant)375 lp_linear_check_variant(struct lp_fragment_shader_variant *variant)
376 {
377 }
378 #endif
379