xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/llvmpipe/lp_state_fs_fastpath.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /**************************************************************************
2  *
3  * Copyright 2010-2021 VMware, Inc.
4  * All Rights Reserved.
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the
8  * "Software"), to deal in the Software without restriction, including
9  * without limitation the rights to use, copy, modify, merge, publish,
10  * distribute, sub license, and/or sell copies of the Software, and to
11  * permit persons to whom the Software is furnished to do so, subject to
12  * the following conditions:
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
17  * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
18  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20  * USE OR OTHER DEALINGS IN THE SOFTWARE.
21  *
22  * The above copyright notice and this permission notice (including the
23  * next paragraph) shall be included in all copies or substantial portions
24  * of the Software.
25  *
26  **************************************************************************/
27 
28 
29 #include "util/detect.h"
30 
31 #include "util/u_math.h"
32 #include "util/u_cpu_detect.h"
33 #include "util/u_sse.h"
34 
35 #include "lp_jit.h"
36 #include "lp_state_fs.h"
37 #include "lp_debug.h"
38 
39 
40 #if DETECT_ARCH_SSE
41 
42 #include <emmintrin.h>
43 
44 
45 static void
no_op(const struct lp_jit_context * context,const struct lp_jit_resources * resources,uint32_t x,uint32_t y,uint32_t facing,const void * a0,const void * dadx,const void * dady,uint8_t ** cbufs,uint8_t * depth,uint64_t mask,struct lp_jit_thread_data * thread_data,unsigned * strides,unsigned depth_stride,unsigned * color_sample_stride,unsigned depth_sample_stride)46 no_op(const struct lp_jit_context *context,
47       const struct lp_jit_resources *resources,
48       uint32_t x,
49       uint32_t y,
50       uint32_t facing,
51       const void *a0,
52       const void *dadx,
53       const void *dady,
54       uint8_t **cbufs,
55       uint8_t *depth,
56       uint64_t mask,
57       struct lp_jit_thread_data *thread_data,
58       unsigned *strides,
59       unsigned depth_stride,
60       unsigned *color_sample_stride,
61       unsigned depth_sample_stride)
62 {
63 }
64 
65 
66 /*
67  * m ? a : b
68  */
69 static inline __m128i
mm_select_si128(__m128i m,__m128i a,__m128i b)70 mm_select_si128(__m128i m, __m128i a, __m128i b)
71 {
72    __m128i res;
73 
74    /*
75     * TODO: use PBLENVB when available.
76     */
77 
78    res = _mm_or_si128(_mm_and_si128(m, a),
79                       _mm_andnot_si128(m, b));
80 
81    return res;
82 }
83 
84 
85 /*
86  * *p = m ? a : *p;
87  */
88 static inline void
mm_store_mask_si128(__m128i * p,__m128i m,__m128i a)89 mm_store_mask_si128(__m128i *p, __m128i m, __m128i a)
90 {
91    _mm_store_si128(p, mm_select_si128(m, a, _mm_load_si128(p)));
92 }
93 
94 
95 /**
96  * Expand the mask from a 16 bit integer to a 4 x 4 x 32 bit vector mask, ie.
97  * 1 bit -> 32bits.
98  */
99 static inline void
expand_mask(uint32_t int_mask,__m128i * vec_mask)100 expand_mask(uint32_t int_mask,
101             __m128i *vec_mask)
102 {
103    __m128i inv_mask = _mm_set1_epi32(~int_mask & 0xffff);
104    __m128i zero = _mm_setzero_si128();
105 
106    vec_mask[0] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008));
107    vec_mask[1] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080));
108    inv_mask = _mm_srli_epi32(inv_mask, 8);
109    vec_mask[2] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0001, 0x0002, 0x0004, 0x0008));
110    vec_mask[3] = _mm_and_si128(inv_mask, _mm_setr_epi32(0x0010, 0x0020, 0x0040, 0x0080));
111 
112    vec_mask[0] = _mm_cmpeq_epi32(vec_mask[0], zero);
113    vec_mask[1] = _mm_cmpeq_epi32(vec_mask[1], zero);
114    vec_mask[2] = _mm_cmpeq_epi32(vec_mask[2], zero);
115    vec_mask[3] = _mm_cmpeq_epi32(vec_mask[3], zero);
116 }
117 
118 
119 /**
120  * Draw opaque color (for debugging).
121  */
122 static void
opaque_color(uint8_t ** cbufs,unsigned * strides,uint32_t int_mask,uint32_t color)123 opaque_color(uint8_t **cbufs, unsigned *strides,
124              uint32_t int_mask,
125              uint32_t color)
126 {
127    __m128i *cbuf = (__m128i *)cbufs[0];
128    unsigned stride = strides[0] / sizeof *cbuf;
129    __m128i vec_mask[4];
130    __m128i vec_color = _mm_set1_epi32(color);
131 
132    expand_mask(int_mask, vec_mask);
133 
134    mm_store_mask_si128(cbuf, vec_mask[0], vec_color); cbuf += stride;
135    mm_store_mask_si128(cbuf, vec_mask[1], vec_color); cbuf += stride;
136    mm_store_mask_si128(cbuf, vec_mask[2], vec_color); cbuf += stride;
137    mm_store_mask_si128(cbuf, vec_mask[3], vec_color);
138 }
139 
140 
141 /**
142  * Draw opaque red (for debugging).
143  */
144 static void
red(const struct lp_jit_context * context,const struct lp_jit_resources * resources,uint32_t x,uint32_t y,uint32_t facing,const void * a0,const void * dadx,const void * dady,uint8_t ** cbufs,uint8_t * depth,uint64_t int_mask,struct lp_jit_thread_data * thread_data,unsigned * strides,unsigned depth_stride,unsigned * sample_stride,unsigned depth_sample_stride)145 red(const struct lp_jit_context *context,
146     const struct lp_jit_resources *resources,
147     uint32_t x,
148     uint32_t y,
149     uint32_t facing,
150     const void *a0,
151     const void *dadx,
152     const void *dady,
153     uint8_t **cbufs,
154     uint8_t *depth,
155     uint64_t int_mask,
156     struct lp_jit_thread_data *thread_data,
157     unsigned *strides,
158     unsigned depth_stride,
159     unsigned *sample_stride,
160     unsigned depth_sample_stride)
161 {
162    opaque_color(cbufs, strides, int_mask, 0xffff0000);
163    (void)facing;
164    (void)depth;
165    (void)thread_data;
166 }
167 
168 
169 /**
170  * Draw opaque green (for debugging).
171  */
172 static void
green(const struct lp_jit_context * context,const struct lp_jit_resources * resources,uint32_t x,uint32_t y,uint32_t facing,const void * a0,const void * dadx,const void * dady,uint8_t ** cbufs,uint8_t * depth,uint64_t int_mask,struct lp_jit_thread_data * thread_data,unsigned * strides,unsigned depth_stride,unsigned * sample_stride,unsigned depth_sample_stride)173 green(const struct lp_jit_context *context,
174       const struct lp_jit_resources *resources,
175       uint32_t x,
176       uint32_t y,
177       uint32_t facing,
178       const void *a0,
179       const void *dadx,
180       const void *dady,
181       uint8_t **cbufs,
182       uint8_t *depth,
183       uint64_t int_mask,
184       struct lp_jit_thread_data *thread_data,
185       unsigned *strides,
186       unsigned depth_stride,
187       unsigned *sample_stride,
188       unsigned depth_sample_stride)
189 {
190    opaque_color(cbufs, strides, int_mask, 0xff00ff00);
191    (void)facing;
192    (void)depth;
193    (void)thread_data;
194 }
195 
196 
197 void
llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant * variant)198 llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant)
199 {
200    variant->jit_function[RAST_WHOLE]     = NULL;
201    variant->jit_function[RAST_EDGE_TEST] = NULL;
202 
203    if (LP_DEBUG & DEBUG_NO_FASTPATH)
204       return;
205 
206    if (variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8A8_UNORM &&
207        variant->key.cbuf_format[0] != PIPE_FORMAT_B8G8R8X8_UNORM) {
208       return;
209    }
210 
211    if (0) {
212       variant->jit_function[RAST_WHOLE]     = red;
213       variant->jit_function[RAST_EDGE_TEST] = red;
214    }
215 
216    if (0) {
217       variant->jit_function[RAST_WHOLE]     = green;
218       variant->jit_function[RAST_EDGE_TEST] = green;
219    }
220 
221    if (0) {
222       variant->jit_function[RAST_WHOLE]     = no_op;
223       variant->jit_function[RAST_EDGE_TEST] = no_op;
224    }
225 
226    /* Make it easier to see triangles:
227     */
228    if ((LP_DEBUG & DEBUG_LINEAR) || (LP_PERF & PERF_NO_SHADE)) {
229       variant->jit_function[RAST_EDGE_TEST] = red;
230       variant->jit_function[RAST_WHOLE] = green;
231    }
232 }
233 
234 #else
235 
236 void
llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant * variant)237 llvmpipe_fs_variant_fastpath(struct lp_fragment_shader_variant *variant)
238 {
239 }
240 
241 #endif
242