1 /**************************************************************************
2 *
3 * Copyright 2010-2021 VMware, Inc.
4 * All Rights Reserved.
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
24 * of the Software.
25 *
26 **************************************************************************/
27
28
29 #include "util/detect.h"
30
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_pack_color.h"
34 #include "util/u_surface.h"
35 #include "util/u_sse.h"
36
37 #include "lp_jit.h"
38 #include "lp_rast.h"
39 #include "lp_debug.h"
40 #include "lp_state_fs.h"
41 #include "lp_linear_priv.h"
42
43
44 #if DETECT_ARCH_SSE
45
46 #include <emmintrin.h>
47
48
49 struct nearest_sampler {
50 alignas(16) uint32_t out[64];
51
52 const struct lp_jit_texture *texture;
53 float fsrc_x; /* src_x0 */
54 float fsrc_y; /* src_y0 */
55 float fdsdx; /* sx */
56 float fdsdy; /* sx */
57 float fdtdx; /* sy */
58 float fdtdy; /* sy */
59 int width;
60 int y;
61
62 const uint32_t *(*fetch)(struct nearest_sampler *samp);
63 };
64
65
66 /* Organize all the information needed for blending in one place.
67 * Could have blend function pointer here, but we currently always
68 * know which one we want to call.
69 */
70 struct color_blend {
71 const uint32_t *src;
72 uint8_t *color;
73 int stride;
74 int width; /* the exact width */
75 };
76
77
78 /* Organize all the information needed for running each of the shaders
79 * in one place.
80 */
81 struct shader {
82 alignas(16) uint32_t out0[64];
83 const uint32_t *src0;
84 const uint32_t *src1;
85 __m128i const0;
86 int width; /* rounded up to multiple of 4 */
87 };
88
89
90 /* For a row of pixels, perform add/one/inv_src_alpha (ie
91 * premultiplied alpha) blending between the incoming pixels and the
92 * destination buffer.
93 *
94 * Used to implement the BLIT_RGBA + blend shader, there are no
95 * operations from the pixel shader left to implement at this level -
96 * effectively the pixel shader was just a texture fetch which has
97 * already been performed. This routine then purely implements
98 * blending.
99 */
100 static void
blend_premul(struct color_blend * blend)101 blend_premul(struct color_blend *blend)
102 {
103 const uint32_t *src = blend->src; /* aligned */
104 uint32_t *dst = (uint32_t *)blend->color; /* unaligned */
105 const int width = blend->width;
106 int i;
107 union { __m128i m128; uint ui[4]; } dstreg;
108
109 blend->color += blend->stride;
110
111 for (i = 0; i + 3 < width; i += 4) {
112 __m128i tmp;
113 tmp = _mm_loadu_si128((const __m128i *)&dst[i]); /* UNALIGNED READ */
114 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
115 tmp);
116 _mm_storeu_si128((__m128i *)&dst[i], dstreg.m128); /* UNALIGNED WRITE */
117 }
118
119 if (i < width) {
120 int j;
121 for (j = 0; j < width - i ; j++) {
122 dstreg.ui[j] = dst[i+j];
123 }
124 dstreg.m128 = util_sse2_blend_premul_4(*(const __m128i *)&src[i],
125 dstreg.m128);
126 for (; i < width; i++)
127 dst[i] = dstreg.ui[i&3];
128 }
129 }
130
131
132 static void
blend_noop(struct color_blend * blend)133 blend_noop(struct color_blend *blend)
134 {
135 memcpy(blend->color, blend->src, blend->width * sizeof(unsigned));
136 blend->color += blend->stride;
137 }
138
139
140 static void
init_blend(struct color_blend * blend,int x,int y,int width,int height,uint8_t * color,int stride)141 init_blend(struct color_blend *blend,
142 int x, int y, int width, int height,
143 uint8_t *color,
144 int stride)
145 {
146 blend->color = color + x * 4 + y * stride;
147 blend->stride = stride;
148 blend->width = width;
149 }
150
151
152 /*
153 * Perform nearest filtered lookup of a row of texels. Texture lookup
154 * is assumed to be axis aligned but with arbitrary scaling.
155 *
156 * Texture coordinate interpolation is performed in 24.8 fixed point.
157 * Note that the longest span we will encounter is 64 pixels long,
158 * meaning that 8 fractional bits is more than sufficient to represent
159 * the shallowest gradient possible within this span.
160 *
161 * After 64 pixels (ie. in the next tile), the starting point will be
162 * recalculated with floating point arithmetic.
163 *
164 * XXX: migrate this to use Jose's quad blitter texture fetch routines.
165 */
166 static const uint32_t *
fetch_row(struct nearest_sampler * samp)167 fetch_row(struct nearest_sampler *samp)
168 {
169 const int y = samp->y++;
170 uint32_t *row = samp->out;
171 const struct lp_jit_texture *texture = samp->texture;
172 const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
173 const uint32_t *src_row =
174 (const uint32_t *)((const uint8_t *)texture->base +
175 yy * texture->row_stride[0]);
176 const int iscale_x = samp->fdsdx * 256;
177 const int width = samp->width;
178 int acc = samp->fsrc_x * 256 + 128;
179
180 for (int i = 0; i < width; i++) {
181 row[i] = src_row[acc>>8];
182 acc += iscale_x;
183 }
184
185 return row;
186 }
187
188
189 /* Version of fetch_row which can cope with texture edges. In
190 * practise, aero never triggers this.
191 */
192 static const uint32_t *
fetch_row_clamped(struct nearest_sampler * samp)193 fetch_row_clamped(struct nearest_sampler *samp)
194 {
195 const int y = samp->y++;
196 uint32_t *row = samp->out;
197 const struct lp_jit_texture *texture = samp->texture;
198 const int yy = util_iround(samp->fsrc_y + samp->fdtdy * y);
199 const uint32_t *src_row =
200 (const uint32_t *)((const uint8_t *)texture->base +
201 CLAMP(yy, 0, texture->height-1) *
202 texture->row_stride[0]);
203 const float src_x0 = samp->fsrc_x;
204 const float scale_x = samp->fdsdx;
205 const int width = samp->width;
206
207 for (int i = 0; i < width; i++) {
208 row[i] = src_row[CLAMP(util_iround(src_x0 + i * scale_x),
209 0, texture->width - 1)];
210 }
211
212 return row;
213 }
214
215 /* It vary rarely happens that some non-axis-aligned texturing creeps
216 * into the linear path. Handle it here. The alternative would be
217 * more pre-checking or an option to fallback by returning false from
218 * jit_linear.
219 */
220 static const uint32_t *
fetch_row_xy_clamped(struct nearest_sampler * samp)221 fetch_row_xy_clamped(struct nearest_sampler *samp)
222 {
223 const int y = samp->y++;
224 uint32_t *row = samp->out;
225 const struct lp_jit_texture *texture = samp->texture;
226 const float yrow = samp->fsrc_y + samp->fdtdy * y;
227 const float xrow = samp->fsrc_x + samp->fdsdy * y;
228 const int width = samp->width;
229
230 for (int i = 0; i < width; i++) {
231 int yy = util_iround(yrow + samp->fdtdx * i);
232 int xx = util_iround(xrow + samp->fdsdx * i);
233
234 const uint32_t *src_row =
235 (const uint32_t *)((const uint8_t *) texture->base +
236 CLAMP(yy, 0, texture->height-1) *
237 texture->row_stride[0]);
238
239 row[i] = src_row[CLAMP(xx, 0, texture->width - 1)];
240 }
241
242 return row;
243 }
244
245
246 static bool
init_nearest_sampler(struct nearest_sampler * samp,const struct lp_jit_texture * texture,int x0,int y0,int width,int height,float s0,float dsdx,float dsdy,float t0,float dtdx,float dtdy,float w0,float dwdx,float dwdy)247 init_nearest_sampler(struct nearest_sampler *samp,
248 const struct lp_jit_texture *texture,
249 int x0, int y0,
250 int width, int height,
251 float s0, float dsdx, float dsdy,
252 float t0, float dtdx, float dtdy,
253 float w0, float dwdx, float dwdy)
254 {
255 const float oow = 1.0f / w0;
256
257 if (dwdx != 0.0 || dwdy != 0.0)
258 return false;
259
260 samp->texture = texture;
261 samp->width = width;
262 samp->fdsdx = dsdx * texture->width * oow;
263 samp->fdsdy = dsdy * texture->width * oow;
264 samp->fdtdx = dtdx * texture->height * oow;
265 samp->fdtdy = dtdy * texture->height * oow;
266 samp->fsrc_x = (samp->fdsdx * x0 +
267 samp->fdsdy * y0 +
268 s0 * texture->width * oow - 0.5f);
269
270 samp->fsrc_y = (samp->fdtdx * x0 +
271 samp->fdtdy * y0 +
272 t0 * texture->height * oow - 0.5f);
273 samp->y = 0;
274
275 /* Because we want to permit consumers of this data to round up to
276 * the next multiple of 4, and because we don't want valgrind to
277 * complain about uninitialized reads, set the last bit of the
278 * buffer to zero:
279 */
280 for (int i = width; i & 3; i++)
281 samp->out[i] = 0;
282
283 if (dsdy != 0 || dtdx != 0) {
284 /* Arbitrary texture lookup:
285 */
286 samp->fetch = fetch_row_xy_clamped;
287 } else {
288 /* Axis aligned stretch blit, abitrary scaling factors including
289 * flipped, minifying and magnifying:
290 */
291 int isrc_x = util_iround(samp->fsrc_x);
292 int isrc_y = util_iround(samp->fsrc_y);
293 int isrc_x1 = util_iround(samp->fsrc_x + width * samp->fdsdx);
294 int isrc_y1 = util_iround(samp->fsrc_y + height * samp->fdtdy);
295
296 /* Look at the maximum and minimum texture coordinates we will be
297 * fetching and figure out if we need to use clamping. There is
298 * similar code in u_blit_sw.c which takes a better approach to
299 * this which could be substituted later.
300 */
301 if (isrc_x <= texture->width && isrc_x >= 0 &&
302 isrc_y <= texture->height && isrc_y >= 0 &&
303 isrc_x1 <= texture->width && isrc_x1 >= 0 &&
304 isrc_y1 <= texture->height && isrc_y1 >= 0) {
305 samp->fetch = fetch_row;
306 } else {
307 samp->fetch = fetch_row_clamped;
308 }
309 }
310
311 return true;
312 }
313
314
315 static const uint32_t *
shade_rgb1(struct shader * shader)316 shade_rgb1(struct shader *shader)
317 {
318 const __m128i rgb1 = _mm_set1_epi32(0xff000000);
319 const uint32_t *src0 = shader->src0;
320 uint32_t *dst = shader->out0;
321 int width = shader->width;
322 int i;
323
324 for (i = 0; i + 3 < width; i += 4) {
325 __m128i s = *(const __m128i *)&src0[i];
326 *(__m128i *)&dst[i] = _mm_or_si128(s, rgb1);
327 }
328
329 return shader->out0;
330 }
331
332
333 static void
init_shader(struct shader * shader,int x,int y,int width,int height)334 init_shader(struct shader *shader,
335 int x, int y, int width, int height)
336 {
337 shader->width = align(width, 4);
338 }
339
340
341 /* Linear shader which implements the BLIT_RGBA shader with the
342 * additional constraints imposed by lp_setup_is_blit().
343 */
344 static bool
blit_rgba_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)345 blit_rgba_blit(const struct lp_rast_state *state,
346 unsigned x, unsigned y,
347 unsigned width, unsigned height,
348 const float (*a0)[4],
349 const float (*dadx)[4],
350 const float (*dady)[4],
351 uint8_t *color,
352 unsigned stride)
353 {
354 const struct lp_jit_resources *resources = &state->jit_resources;
355 const struct lp_jit_texture *texture = &resources->textures[0];
356 const uint8_t *src;
357 unsigned src_stride;
358 int src_x, src_y;
359
360 LP_DBG(DEBUG_RAST, "%s\n", __func__);
361
362 /* Require w==1.0:
363 */
364 if (a0[0][3] != 1.0 ||
365 dadx[0][3] != 0.0 ||
366 dady[0][3] != 0.0)
367 return false;
368
369 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
370 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
371
372 src = texture->base;
373 src_stride = texture->row_stride[0];
374
375 /* Fall back to blit_rgba() if clamping required:
376 */
377 if (src_x < 0 ||
378 src_y < 0 ||
379 src_x + width > texture->width ||
380 src_y + height > texture->height)
381 return false;
382
383 util_copy_rect(color, PIPE_FORMAT_B8G8R8A8_UNORM, stride,
384 x, y,
385 width, height,
386 src, src_stride,
387 src_x, src_y);
388
389 return true;
390 }
391
392
393 /* Linear shader which implements the BLIT_RGB1 shader, with the
394 * additional constraints imposed by lp_setup_is_blit().
395 */
396 static bool
blit_rgb1_blit(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)397 blit_rgb1_blit(const struct lp_rast_state *state,
398 unsigned x, unsigned y,
399 unsigned width, unsigned height,
400 const float (*a0)[4],
401 const float (*dadx)[4],
402 const float (*dady)[4],
403 uint8_t *color,
404 unsigned stride)
405 {
406 const struct lp_jit_resources *resources = &state->jit_resources;
407 const struct lp_jit_texture *texture = &resources->textures[0];
408 const uint8_t *src;
409 unsigned src_stride;
410 int src_x, src_y;
411
412 LP_DBG(DEBUG_RAST, "%s\n", __func__);
413
414 /* Require w==1.0:
415 */
416 if (a0[0][3] != 1.0 ||
417 dadx[0][3] != 0.0 ||
418 dady[0][3] != 0.0)
419 return false;
420
421 color += x * 4 + y * stride;
422
423 src_x = x + util_iround(a0[1][0]*texture->width - 0.5f);
424 src_y = y + util_iround(a0[1][1]*texture->height - 0.5f);
425
426 src = texture->base;
427 src_stride = texture->row_stride[0];
428 src += src_x * 4;
429 src += src_y * src_stride;
430
431 if (src_x < 0 ||
432 src_y < 0 ||
433 src_x + width > texture->width ||
434 src_y + height > texture->height)
435 return false;
436
437 for (y = 0; y < height; y++) {
438 const uint32_t *src_row = (const uint32_t *)src;
439 uint32_t *dst_row = (uint32_t *)color;
440
441 for (x = 0; x < width; x++) {
442 *dst_row++ = *src_row++ | 0xff000000;
443 }
444
445 color += stride;
446 src += src_stride;
447 }
448
449 return true;
450 }
451
452
453 /* Linear shader variant implementing the BLIT_RGBA shader without
454 * blending.
455 */
456 static bool
blit_rgba(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)457 blit_rgba(const struct lp_rast_state *state,
458 unsigned x, unsigned y,
459 unsigned width, unsigned height,
460 const float (*a0)[4],
461 const float (*dadx)[4],
462 const float (*dady)[4],
463 uint8_t *color,
464 unsigned stride)
465 {
466 const struct lp_jit_resources *resources = &state->jit_resources;
467 struct nearest_sampler samp;
468 struct color_blend blend;
469
470 LP_DBG(DEBUG_RAST, "%s\n", __func__);
471
472 if (!init_nearest_sampler(&samp,
473 &resources->textures[0],
474 x, y, width, height,
475 a0[1][0], dadx[1][0], dady[1][0],
476 a0[1][1], dadx[1][1], dady[1][1],
477 a0[0][3], dadx[0][3], dady[0][3]))
478 return false;
479
480 init_blend(&blend,
481 x, y, width, height,
482 color, stride);
483
484 /* Rasterize the rectangle and run the shader:
485 */
486 for (y = 0; y < height; y++) {
487 blend.src = samp.fetch(&samp);
488 blend_noop(&blend);
489 }
490
491 return true;
492 }
493
494
495 static bool
blit_rgb1(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)496 blit_rgb1(const struct lp_rast_state *state,
497 unsigned x, unsigned y,
498 unsigned width, unsigned height,
499 const float (*a0)[4],
500 const float (*dadx)[4],
501 const float (*dady)[4],
502 uint8_t *color,
503 unsigned stride)
504 {
505 const struct lp_jit_resources *resources = &state->jit_resources;
506 struct nearest_sampler samp;
507 struct color_blend blend;
508 struct shader shader;
509
510 LP_DBG(DEBUG_RAST, "%s\n", __func__);
511
512 if (!init_nearest_sampler(&samp,
513 &resources->textures[0],
514 x, y, width, height,
515 a0[1][0], dadx[1][0], dady[1][0],
516 a0[1][1], dadx[1][1], dady[1][1],
517 a0[0][3], dadx[0][3], dady[0][3]))
518 return false;
519
520 init_blend(&blend, x, y, width, height, color, stride);
521
522 init_shader(&shader, x, y, width, height);
523
524 /* Rasterize the rectangle and run the shader:
525 */
526 for (y = 0; y < height; y++) {
527 shader.src0 = samp.fetch(&samp);
528 blend.src = shade_rgb1(&shader);
529 blend_noop(&blend);
530 }
531
532 return true;
533 }
534
535
536 /* Linear shader variant implementing the BLIT_RGBA shader with
537 * one/inv_src_alpha blending.
538 */
539 static bool
blit_rgba_blend_premul(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)540 blit_rgba_blend_premul(const struct lp_rast_state *state,
541 unsigned x, unsigned y,
542 unsigned width, unsigned height,
543 const float (*a0)[4],
544 const float (*dadx)[4],
545 const float (*dady)[4],
546 uint8_t *color,
547 unsigned stride)
548 {
549 const struct lp_jit_resources *resources = &state->jit_resources;
550 struct nearest_sampler samp;
551 struct color_blend blend;
552
553 LP_DBG(DEBUG_RAST, "%s\n", __func__);
554
555 if (!init_nearest_sampler(&samp,
556 &resources->textures[0],
557 x, y, width, height,
558 a0[1][0], dadx[1][0], dady[1][0],
559 a0[1][1], dadx[1][1], dady[1][1],
560 a0[0][3], dadx[0][3], dady[0][3]))
561 return false;
562
563 init_blend(&blend, x, y, width, height, color, stride);
564
565 /* Rasterize the rectangle and run the shader:
566 */
567 for (y = 0; y < height; y++) {
568 blend.src = samp.fetch(&samp);
569 blend_premul(&blend);
570 }
571
572 return true;
573 }
574
575
576 /* Linear shader which always emits red. Used for debugging.
577 */
578 static bool
linear_red(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)579 linear_red(const struct lp_rast_state *state,
580 unsigned x, unsigned y,
581 unsigned width, unsigned height,
582 const float (*a0)[4],
583 const float (*dadx)[4],
584 const float (*dady)[4],
585 uint8_t *color,
586 unsigned stride)
587 {
588 union util_color uc;
589
590 util_pack_color_ub(0xff, 0, 0, 0xff,
591 PIPE_FORMAT_B8G8R8A8_UNORM, &uc);
592
593 util_fill_rect(color,
594 PIPE_FORMAT_B8G8R8A8_UNORM,
595 stride,
596 x,
597 y,
598 width,
599 height,
600 &uc);
601
602 return true;
603 }
604
605
606 /* Noop linear shader variant, for debugging.
607 */
608 static bool
linear_no_op(const struct lp_rast_state * state,unsigned x,unsigned y,unsigned width,unsigned height,const float (* a0)[4],const float (* dadx)[4],const float (* dady)[4],uint8_t * color,unsigned stride)609 linear_no_op(const struct lp_rast_state *state,
610 unsigned x, unsigned y,
611 unsigned width, unsigned height,
612 const float (*a0)[4],
613 const float (*dadx)[4],
614 const float (*dady)[4],
615 uint8_t *color,
616 unsigned stride)
617 {
618 return true;
619 }
620
621
622 /* Check for ADD/ONE/INV_SRC_ALPHA, ie premultiplied-alpha blending.
623 */
624 static bool
is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant * variant)625 is_one_inv_src_alpha_blend(const struct lp_fragment_shader_variant *variant)
626 {
627 return
628 !variant->key.blend.logicop_enable &&
629 variant->key.blend.rt[0].blend_enable &&
630 variant->key.blend.rt[0].rgb_func == PIPE_BLEND_ADD &&
631 variant->key.blend.rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
632 variant->key.blend.rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
633 variant->key.blend.rt[0].alpha_func == PIPE_BLEND_ADD &&
634 variant->key.blend.rt[0].alpha_src_factor == PIPE_BLENDFACTOR_ONE &&
635 variant->key.blend.rt[0].alpha_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA &&
636 variant->key.blend.rt[0].colormask == 0xf;
637 }
638
639
640 /* Examine the fragment shader variant and determine whether we can
641 * substitute a fastpath linear shader implementation.
642 */
643 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)644 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
645 {
646 if (LP_PERF & PERF_NO_SHADE) {
647 variant->jit_linear = linear_red;
648 return;
649 }
650
651 struct lp_sampler_static_state *samp0 =
652 lp_fs_variant_key_sampler_idx(&variant->key, 0);
653 if (!samp0)
654 return;
655
656 enum pipe_format tex_format = samp0->texture_state.format;
657 if (variant->shader->kind == LP_FS_KIND_BLIT_RGBA &&
658 tex_format == PIPE_FORMAT_B8G8R8A8_UNORM &&
659 is_nearest_clamp_sampler(samp0)) {
660 if (variant->opaque) {
661 variant->jit_linear_blit = blit_rgba_blit;
662 variant->jit_linear = blit_rgba;
663 } else if (is_one_inv_src_alpha_blend(variant) &&
664 util_get_cpu_caps()->has_sse2) {
665 variant->jit_linear = blit_rgba_blend_premul;
666 }
667 return;
668 }
669
670 if (variant->shader->kind == LP_FS_KIND_BLIT_RGB1 &&
671 variant->opaque &&
672 (tex_format == PIPE_FORMAT_B8G8R8A8_UNORM ||
673 tex_format == PIPE_FORMAT_B8G8R8X8_UNORM) &&
674 is_nearest_clamp_sampler(samp0)) {
675 variant->jit_linear_blit = blit_rgb1_blit;
676 variant->jit_linear = blit_rgb1;
677 return;
678 }
679
680 if (0) {
681 variant->jit_linear = linear_no_op;
682 return;
683 }
684 }
685 #else
686 void
llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant * variant)687 llvmpipe_fs_variant_linear_fastpath(struct lp_fragment_shader_variant *variant)
688 {
689 /* don't bother if there is no SSE */
690 }
691 #endif
692
693