xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_state_fs_linear.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/detect.h"
30 
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36 
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42 
43 
44 #if DETECT_ARCH_SSE
45 
46 #include <emmintrin.h>
47 
48 
49 struct nearest_sampler {
50    alignas(16) uint32_t out[64];
51 
52    const struct lp_jit_texture *texture;
53    float fsrc_x;                /* src_x0 */
54    float fsrc_y;                /* src_y0 */
55    float fdsdx;              /* sx */
56    float fdsdy;              /* sx */
57    float fdtdx;              /* sy */
58    float fdtdy;              /* sy */
59    int width;
60    int y;
61 
62    const uint32_t *(*fetch)(struct nearest_sampler *samp);
63 };
64 
65 
66 /* Organize all the information needed for blending in one place.
67  * Could have blend function pointer here, but we currently always
68  * know which one we want to call.
69  */
70 struct color_blend {
71    const uint32_t *src;
72    uint8_t *color;
73    int stride;
74    int width;                   /* the exact width */
75 };
76 
77 
78 /* Organize all the information needed for running each of the shaders
79  * in one place.
80  */
81 struct shader {
82    alignas(16) uint32_t out0[64];
83    const uint32_t *src0;
84    const uint32_t *src1;
85    __m128i const0;
86    int width;                   /* rounded up to multiple of 4 */
87 };
88 
89 
90 /* For a row of pixels, perform add/one/inv_src_alpha (ie
91  * premultiplied alpha) blending between the incoming pixels and the
92  * destination buffer.
93  *
94  * Used to implement the BLIT_RGBA + blend shader, there are no
95  * operations from the pixel shader left to implement at this level -
96  * effectively the pixel shader was just a texture fetch which has
97  * already been performed.  This routine then purely implements
98  * blending.
99  */
100 static void
blend_premul(struct color_blend * blend)101 blend_premul(struct color_blend *blend)
102 {
103    const uint32_t *src = blend->src;  /* aligned */
104    uint32_t *dst = (uint32_t *)blend->color;      /* unaligned */
105    const int width = blend->width;
106    int i;
107    union { __m128i m128; uint ui[4]; } dstreg;
108 
109    blend->color += blend->stride;
110 
111    for (i = 0; i + 3 < width; i += 4) {
112       __m128i tmp;
113       tmp = _mm_loadu_si128((const __m128i *)&dst[i]);  /* UNALIGNED READ */
114       dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
115                                              tmp);
116       _mm_storeu_si128((__m128i *)&dst[i], dstreg.m128); /* UNALIGNED WRITE */
117    }
118 
119    if (i < width) {
120       int j;
121       for (j = 0; j < width - i ; j++) {
122          dstreg.ui[j] = dst[i+j];
123       }
124       dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
125                                              dstreg.m128);
126       for (; i < width; i++)
127          dst[i] = dstreg.ui[i&3];
128    }
129 }
130 
131 
132 static void
blend_noop(struct color_blend * blend)133 blend_noop(struct color_blend *blend)
134 {
135    memcpy(blend->color, blend->src, blend->width * sizeof(unsigned));
136    blend->color += blend->stride;
137 }
138 
139 
140 static void
init_blend(struct color_blend * blend,int x,int y,int width,int height,uint8_t * color,int stride)141 init_blend(struct color_blend *blend,
142            int x, int y, int width, int height,
143            uint8_t *color,
144            int stride)
145 {
146    blend->color = color + x * 4 + y * stride;
147    blend->stride = stride;
148    blend->width = width;
149 }
150 
151 
152 /*
153  * Perform nearest filtered lookup of a row of texels.  Texture lookup
154  * is assumed to be axis aligned but with arbitrary scaling.
155  *
156  * Texture coordinate interpolation is performed in 24.8 fixed point.
157  * Note that the longest span we will encounter is 64 pixels long,
158  * meaning that 8 fractional bits is more than sufficient to represent
159  * the shallowest gradient possible within this span.
160  *
161  * After 64 pixels (ie. in the next tile), the starting point will be
162  * recalculated with floating point arithmetic.
163  *
164  * XXX: migrate this to use Jose's quad blitter texture fetch routines.
165  */
166 static const uint32_t *
fetch_row(struct nearest_sampler * samp)167 fetch_row(struct nearest_sampler *samp)
168 {
169    const int y = samp->y++;
170    uint32_t *row = samp->out;
171    const struct lp_jit_texture *texture = samp->texture;
172    const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
173    const uint32_t *src_row =
174       (const uint32_t *)((const uint8_t *)texture->base +
175                          yy * texture->row_stride[0]);
176    const int iscale_x = samp->fdsdx * 256;
177    const int width = samp->width;
178    int acc = samp->fsrc_x * 256 + 128;
179 
180    for (int i = 0; i < width; i++) {
181       row[i] = src_row[acc>>8];
182       acc += iscale_x;
183    }
184 
185    return row;
186 }
187 
188 
189 /* Version of fetch_row which can cope with texture edges.  In
190  * practise, aero never triggers this.
191  */
192 static const uint32_t *
fetch_row_clamped(struct nearest_sampler * samp)193 fetch_row_clamped(struct nearest_sampler *samp)
194 {
195    const int y = samp->y++;
196    uint32_t *row = samp->out;
197    const struct lp_jit_texture *texture = samp->texture;
198    const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
199    const uint32_t *src_row =
200       (const uint32_t *)((const uint8_t *)texture->base +
201                          CLAMP(yy, 0, texture->height-1) *
202                          texture->row_stride[0]);
203    const float src_x0 = samp->fsrc_x;
204    const float scale_x = samp->fdsdx;
205    const int width = samp->width;
206 
207    for (int i = 0; i < width; i++) {
208       row[i] = src_row[CLAMP(util_iround(src_x0 + i * scale_x),
209                              0, texture->width - 1)];
210    }
211 
212    return row;
213 }
214 
215 /* It vary rarely happens that some non-axis-aligned texturing creeps
216  * into the linear path.  Handle it here.  The alternative would be
217  * more pre-checking or an option to fallback by returning false from
218  * jit_linear.
219  */
220 static const uint32_t *
fetch_row_xy_clamped(struct nearest_sampler * samp)221 fetch_row_xy_clamped(struct nearest_sampler *samp)
222 {
223    const int y = samp->y++;
224    uint32_t *row = samp->out;
225    const struct lp_jit_texture *texture = samp->texture;
226    const float yrow = samp->fsrc_y + samp->fdtdy * y;
227    const float xrow = samp->fsrc_x + samp->fdsdy * y;
228    const int width  = samp->width;
229 
230    for (int i = 0; i < width; i++) {
231       int yy = util_iround(yrow + samp->fdtdx * i);
232       int xx = util_iround(xrow + samp->fdsdx * i);
233 
234       const uint32_t *src_row =
235          (const uint32_t *)((const uint8_t *) texture->base +
236                             CLAMP(yy, 0, texture->height-1) *
237                             texture->row_stride[0]);
238 
239       row[i] = src_row[CLAMP(xx, 0, texture->width - 1)];
240    }
241 
242    return row;
243 }
244 
245 
246 static bool
init_nearest_sampler(struct nearest_sampler * samp,const struct lp_jit_texture * texture,int x0,int y0,int width,int height,float s0,float dsdx,float dsdy,float t0,float dtdx,float dtdy,float w0,float dwdx,float dwdy)247 init_nearest_sampler(struct nearest_sampler *samp,
248                      const struct lp_jit_texture *texture,
249                      int x0, int y0,
250                      int width, int height,
251                      float s0, float dsdx, float dsdy,
252                      float t0, float dtdx, float dtdy,
253                      float w0, float dwdx, float dwdy)
254 {
255    const float oow = 1.0f / w0;
256 
257    if (dwdx != 0.0 || dwdy != 0.0)
258       return false;
259 
260    samp->texture = texture;
261    samp->width = width;
262    samp->fdsdx = dsdx * texture->width * oow;
263    samp->fdsdy = dsdy * texture->width * oow;
264    samp->fdtdx = dtdx * texture->height * oow;
265    samp->fdtdy = dtdy * texture->height * oow;
266    samp->fsrc_x = (samp->fdsdx * x0 +
267                    samp->fdsdy * y0 +
268                    s0 * texture->width * oow - 0.5f);
269 
270    samp->fsrc_y = (samp->fdtdx * x0 +
271                    samp->fdtdy * y0 +
272                    t0 * texture->height * oow - 0.5f);
273    samp->y = 0;
274 
275    /* Because we want to permit consumers of this data to round up to
276     * the next multiple of 4, and because we don't want valgrind to
277     * complain about uninitialized reads, set the last bit of the
278     * buffer to zero:
279     */
280    for (int i = width; i & 3; i++)
281       samp->out[i] = 0;
282 
283    if (dsdy != 0 || dtdx != 0) {
284       /* Arbitrary texture lookup:
285        */
286       samp->fetch = fetch_row_xy_clamped;
287    } else {
288       /* Axis aligned stretch blit, abitrary scaling factors including
289        * flipped, minifying and magnifying:
290        */
291       int isrc_x = util_iround(samp->fsrc_x);
292       int isrc_y = util_iround(samp->fsrc_y);
293       int isrc_x1 = util_iround(samp->fsrc_x + width * samp->fdsdx);
294       int isrc_y1 = util_iround(samp->fsrc_y + height * samp->fdtdy);
295 
296       /* Look at the maximum and minimum texture coordinates we will be
297        * fetching and figure out if we need to use clamping.  There is
298        * similar code in u_blit_sw.c which takes a better approach to
299        * this which could be substituted later.
300        */
301       if (isrc_x  <= texture->width  && isrc_x  >= 0 &&
302           isrc_y  <= texture->height && isrc_y  >= 0 &&
303           isrc_x1 <= texture->width  && isrc_x1 >= 0 &&
304           isrc_y1 <= texture->height && isrc_y1 >= 0) {
305          samp->fetch = fetch_row;
306       } else {
307          samp->fetch = fetch_row_clamped;
308       }
309    }
310 
311    return true;
312 }
313 
314 
315 static const uint32_t *
shade_rgb1(struct shader * shader)316 shade_rgb1(struct shader *shader)
317 {
318    const __m128i rgb1 = _mm_set1_epi32(0xff000000);
319    const uint32_t *src0 = shader->src0;
320    uint32_t *dst = shader->out0;
321    int width = shader->width;
322    int i;
323 
324    for (i = 0; i + 3 < width; i += 4) {
325       __m128i s = *(const __m128i *)&src0[i];
326       *(__m128i *)&dst[i] = _mm_or_si128(s, rgb1);
327    }
328 
329    return shader->out0;
330 }
331 
332 
333 static void
init_shader(struct shader * shader,int x,int y,int width,int height)334 init_shader(struct shader *shader,
335            int x, int y, int width, int height)
336 {
337    shader->width = align(width, 4);
338 }
339 
340 
341 /* Linear shader which implements the BLIT_RGBA shader with the
342  * additional constraints imposed by lp_setup_is_blit().
343  */
344 static bool
blit_rgba_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)345 blit_rgba_blit(const struct lp_rast_state *state,
346                unsigned x, unsigned y,
347                unsigned width, unsigned height,
348                const float (*a0)[4],
349                const float (*dadx)[4],
350                const float (*dady)[4],
351                uint8_t *color,
352                unsigned stride)
353 {
354    const struct lp_jit_resources *resources = &state->jit_resources;
355    const struct lp_jit_texture *texture = &resources->textures[0];
356    const uint8_t *src;
357    unsigned src_stride;
358    int src_x, src_y;
359 
360    LP_DBG(DEBUG_RAST, "%s\n", __func__);
361 
362    /* Require w==1.0:
363     */
364    if (a0[0][3] != 1.0 ||
365        dadx[0][3] != 0.0 ||
366        dady[0][3] != 0.0)
367       return false;
368 
369    src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
370    src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
371 
372    src = texture->base;
373    src_stride = texture->row_stride[0];
374 
375    /* Fall back to blit_rgba() if clamping required:
376     */
377    if (src_x < 0 ||
378        src_y < 0 ||
379        src_x + width > texture->width ||
380        src_y + height > texture->height)
381       return false;
382 
383    util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
384                   x, y,
385                   width, height,
386                   src, src_stride,
387                   src_x, src_y);
388 
389    return true;
390 }
391 
392 
393 /* Linear shader which implements the BLIT_RGB1 shader, with the
394  * additional constraints imposed by lp_setup_is_blit().
395  */
396 static bool
blit_rgb1_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)397 blit_rgb1_blit(const struct lp_rast_state *state,
398                unsigned x, unsigned y,
399                unsigned width, unsigned height,
400                const float (*a0)[4],
401                const float (*dadx)[4],
402                const float (*dady)[4],
403                uint8_t *color,
404                unsigned stride)
405 {
406    const struct lp_jit_resources *resources = &state->jit_resources;
407    const struct lp_jit_texture *texture = &resources->textures[0];
408    const uint8_t *src;
409    unsigned src_stride;
410    int src_x, src_y;
411 
412    LP_DBG(DEBUG_RAST, "%s\n", __func__);
413 
414    /* Require w==1.0:
415     */
416    if (a0[0][3] != 1.0 ||
417        dadx[0][3] != 0.0 ||
418        dady[0][3] != 0.0)
419       return false;
420 
421    color += x * 4 + y * stride;
422 
423    src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
424    src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
425 
426    src = texture->base;
427    src_stride = texture->row_stride[0];
428    src += src_x * 4;
429    src += src_y * src_stride;
430 
431    if (src_x < 0 ||
432        src_y < 0 ||
433        src_x + width > texture->width ||
434        src_y + height > texture->height)
435       return false;
436 
437    for (y = 0; y < height; y++) {
438       const uint32_t *src_row = (const uint32_t *)src;
439       uint32_t *dst_row = (uint32_t *)color;
440 
441       for (x = 0; x < width; x++) {
442          *dst_row++ = *src_row++ | 0xff000000;
443       }
444 
445       color += stride;
446       src += src_stride;
447    }
448 
449    return true;
450 }
451 
452 
453 /* Linear shader variant implementing the BLIT_RGBA shader without
454  * blending.
455  */
456 static bool
blit_rgba(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)457 blit_rgba(const struct lp_rast_state *state,
458           unsigned x, unsigned y,
459           unsigned width, unsigned height,
460           const float (*a0)[4],
461           const float (*dadx)[4],
462           const float (*dady)[4],
463           uint8_t *color,
464           unsigned stride)
465 {
466    const struct lp_jit_resources *resources = &state->jit_resources;
467    struct nearest_sampler samp;
468    struct color_blend blend;
469 
470    LP_DBG(DEBUG_RAST, "%s\n", __func__);
471 
472    if (!init_nearest_sampler(&samp,
473                              &resources->textures[0],
474                              x, y, width, height,
475                              a0[1][0], dadx[1][0], dady[1][0],
476                              a0[1][1], dadx[1][1], dady[1][1],
477                              a0[0][3], dadx[0][3], dady[0][3]))
478       return false;
479 
480    init_blend(&blend,
481               x, y, width, height,
482               color, stride);
483 
484    /* Rasterize the rectangle and run the shader:
485     */
486    for (y = 0; y < height; y++) {
487       blend.src = samp.fetch(&samp);
488       blend_noop(&blend);
489    }
490 
491    return true;
492 }
493 
494 
495 static bool
blit_rgb1(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)496 blit_rgb1(const struct lp_rast_state *state,
497           unsigned x, unsigned y,
498           unsigned width, unsigned height,
499           const float (*a0)[4],
500           const float (*dadx)[4],
501           const float (*dady)[4],
502           uint8_t *color,
503           unsigned stride)
504 {
505    const struct lp_jit_resources *resources = &state->jit_resources;
506    struct nearest_sampler samp;
507    struct color_blend blend;
508    struct shader shader;
509 
510    LP_DBG(DEBUG_RAST, "%s\n", __func__);
511 
512    if (!init_nearest_sampler(&samp,
513                              &resources->textures[0],
514                              x, y, width, height,
515                              a0[1][0], dadx[1][0], dady[1][0],
516                              a0[1][1], dadx[1][1], dady[1][1],
517                              a0[0][3], dadx[0][3], dady[0][3]))
518       return false;
519 
520    init_blend(&blend, x, y, width, height, color, stride);
521 
522    init_shader(&shader, x, y, width, height);
523 
524    /* Rasterize the rectangle and run the shader:
525     */
526    for (y = 0; y < height; y++) {
527       shader.src0 = samp.fetch(&samp);
528       blend.src = shade_rgb1(&shader);
529       blend_noop(&blend);
530    }
531 
532    return true;
533 }
534 
535 
536 /* Linear shader variant implementing the BLIT_RGBA shader with
537  * one/inv_src_alpha blending.
538  */
539 static bool
blit_rgba_blend_premul(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)540 blit_rgba_blend_premul(const struct lp_rast_state *state,
541                        unsigned x, unsigned y,
542                        unsigned width, unsigned height,
543                        const float (*a0)[4],
544                        const float (*dadx)[4],
545                        const float (*dady)[4],
546                        uint8_t *color,
547                        unsigned stride)
548 {
549    const struct lp_jit_resources *resources = &state->jit_resources;
550    struct nearest_sampler samp;
551    struct color_blend blend;
552 
553    LP_DBG(DEBUG_RAST, "%s\n", __func__);
554 
555    if (!init_nearest_sampler(&samp,
556                              &resources->textures[0],
557                              x, y, width, height,
558                              a0[1][0], dadx[1][0], dady[1][0],
559                              a0[1][1], dadx[1][1], dady[1][1],
560                              a0[0][3], dadx[0][3], dady[0][3]))
561       return false;
562 
563    init_blend(&blend, x, y, width, height, color, stride);
564 
565    /* Rasterize the rectangle and run the shader:
566     */
567    for (y = 0; y < height; y++) {
568       blend.src = samp.fetch(&samp);
569       blend_premul(&blend);
570    }
571 
572    return true;
573 }
574 
575 
576 /* Linear shader which always emits red.  Used for debugging.
577  */
578 static bool
linear_red(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)579 linear_red(const struct lp_rast_state *state,
580            unsigned x, unsigned y,
581            unsigned width, unsigned height,
582            const float (*a0)[4],
583            const float (*dadx)[4],
584            const float (*dady)[4],
585            uint8_t *color,
586            unsigned stride)
587 {
588    union util_color uc;
589 
590    util_pack_color_ub(0xff, 0, 0, 0xff,
591                       PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
592 
593    util_fill_rect(color,
594                   PIPE_FORMAT_B8G8R8A8_UNORM,
595                   stride,
596                   x,
597                   y,
598                   width,
599                   height,
600                   &uc);
601 
602    return true;
603 }
604 
605 
606 /* Noop linear shader variant, for debugging.
607  */
608 static bool
linear_no_op(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)609 linear_no_op(const struct lp_rast_state *state,
610              unsigned x, unsigned y,
611              unsigned width, unsigned height,
612              const float (*a0)[4],
613              const float (*dadx)[4],
614              const float (*dady)[4],
615              uint8_t *color,
616              unsigned stride)
617 {
618    return true;
619 }
620 
621 
622 /* Check for ADD/ONE/INV_SRC_ALPHA, ie premultiplied-alpha blending.
623  */
624 static bool
is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant * variant)625 is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)
626 {
627    return
628       !variant->key.blend.logicop_enable &&
629       variant->key.blend.rt[0].blend_enable &&
630       variant->key.blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
631       variant->key.blend.rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
632       variant->key.blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
633       variant->key.blend.rt[0].alpha_func == PIPE_BLEND_ADD &&
634       variant->key.blend.rt[0].alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
635       variant->key.blend.rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
636       variant->key.blend.rt[0].colormask == 0xf;
637 }
638 
639 
640 /* Examine the fragment shader variant and determine whether we can
641  * substitute a fastpath linear shader implementation.
642  */
643 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)644 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
645 {
646    if (LP_PERF & PERF_NO_SHADE) {
647       variant->jit_linear = linear_red;
648       return;
649    }
650 
651    struct lp_sampler_static_state *samp0 =
652       lp_fs_variant_key_sampler_idx(&variant->key, 0);
653    if (!samp0)
654       return;
655 
656    enum pipe_format tex_format = samp0->texture_state.format;
657    if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
658        tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
659        is_nearest_clamp_sampler(samp0)) {
660       if (variant->opaque) {
661          variant->jit_linear_blit = blit_rgba_blit;
662          variant->jit_linear = blit_rgba;
663       } else if (is_one_inv_src_alpha_blend(variant) &&
664                  util_get_cpu_caps()->has_sse2) {
665          variant->jit_linear = blit_rgba_blend_premul;
666       }
667       return;
668    }
669 
670    if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
671        variant->opaque &&
672        (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
673         tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
674        is_nearest_clamp_sampler(samp0)) {
675       variant->jit_linear_blit = blit_rgb1_blit;
676       variant->jit_linear = blit_rgb1;
677       return;
678    }
679 
680    if (0) {
681       variant->jit_linear = linear_no_op;
682       return;
683    }
684 }
685 #else
686 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)687 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
688 {
689    /* don't bother if there is no SSE */
690 }
691 #endif
692 
693