1 /**************************************************************************
2 *
3 * Copyright 2007-2008 VMware, Inc.
4 * All Rights Reserved.
5 * Copyright 2009-2010 VMware, Inc. All rights Reserved.
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the
9 * "Software"), to deal in the Software without restriction, including
10 * without limitation the rights to use, copy, modify, merge, publish,
11 * distribute, sub license, and/or sell copies of the Software, and to
12 * permit persons to whom the Software is furnished to do so, subject to
13 * the following conditions:
14 *
15 * The above copyright notice and this permission notice (including the
16 * next paragraph) shall be included in all copies or substantial portions
17 * of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
22 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
23 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
24 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
25 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 *
27 **************************************************************************/
28
29 /**
30 * TGSI interpreter/executor.
31 *
32 * Flow control information:
33 *
34 * Since we operate on 'quads' (4 pixels or 4 vertices in parallel)
35 * flow control statements (IF/ELSE/ENDIF, LOOP/ENDLOOP) require special
36 * care since a condition may be true for some quad components but false
37 * for other components.
38 *
39 * We basically execute all statements (even if they're in the part of
40 * an IF/ELSE clause that's "not taken") and use a special mask to
41 * control writing to destination registers. This is the ExecMask.
42 * See store_dest().
43 *
44 * The ExecMask is computed from three other masks (CondMask, LoopMask and
45 * ContMask) which are controlled by the flow control instructions (namely:
46 * (IF/ELSE/ENDIF, LOOP/ENDLOOP and CONT).
47 *
48 *
49 * Authors:
50 * Michal Krol
51 * Brian Paul
52 */
53
54 #include "util/compiler.h"
55 #include "pipe/p_state.h"
56 #include "pipe/p_shader_tokens.h"
57 #include "tgsi/tgsi_dump.h"
58 #include "tgsi/tgsi_parse.h"
59 #include "tgsi/tgsi_util.h"
60 #include "tgsi_exec.h"
61 #include "util/compiler.h"
62 #include "util/half_float.h"
63 #include "util/u_memory.h"
64 #include "util/u_math.h"
65 #include "util/rounding.h"
66
67
68 #define DEBUG_EXECUTION 0
69
70
71 #define TILE_TOP_LEFT 0
72 #define TILE_TOP_RIGHT 1
73 #define TILE_BOTTOM_LEFT 2
74 #define TILE_BOTTOM_RIGHT 3
75
76 static_assert(alignof(union tgsi_exec_channel) == 16, "");
77 static_assert(alignof(struct tgsi_exec_vector) == 16, "");
78 static_assert(alignof(struct tgsi_exec_machine) == 16, "");
79
80 union tgsi_double_channel {
81 alignas(16)
82 double d[TGSI_QUAD_SIZE];
83 unsigned u[TGSI_QUAD_SIZE][2];
84 uint64_t u64[TGSI_QUAD_SIZE];
85 int64_t i64[TGSI_QUAD_SIZE];
86 };
87
88 struct tgsi_double_vector {
89 alignas(16)
90 union tgsi_double_channel xy;
91 union tgsi_double_channel zw;
92 };
93
94 static_assert(alignof(union tgsi_double_channel) == 16, "");
95 static_assert(alignof(struct tgsi_double_vector) == 16, "");
96
97 static void
micro_abs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)98 micro_abs(union tgsi_exec_channel *dst,
99 const union tgsi_exec_channel *src)
100 {
101 dst->f[0] = fabsf(src->f[0]);
102 dst->f[1] = fabsf(src->f[1]);
103 dst->f[2] = fabsf(src->f[2]);
104 dst->f[3] = fabsf(src->f[3]);
105 }
106
107 static void
micro_arl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)108 micro_arl(union tgsi_exec_channel *dst,
109 const union tgsi_exec_channel *src)
110 {
111 dst->i[0] = (int)floorf(src->f[0]);
112 dst->i[1] = (int)floorf(src->f[1]);
113 dst->i[2] = (int)floorf(src->f[2]);
114 dst->i[3] = (int)floorf(src->f[3]);
115 }
116
117 static void
micro_arr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)118 micro_arr(union tgsi_exec_channel *dst,
119 const union tgsi_exec_channel *src)
120 {
121 dst->i[0] = (int)floorf(src->f[0] + 0.5f);
122 dst->i[1] = (int)floorf(src->f[1] + 0.5f);
123 dst->i[2] = (int)floorf(src->f[2] + 0.5f);
124 dst->i[3] = (int)floorf(src->f[3] + 0.5f);
125 }
126
127 static void
micro_ceil(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)128 micro_ceil(union tgsi_exec_channel *dst,
129 const union tgsi_exec_channel *src)
130 {
131 dst->f[0] = ceilf(src->f[0]);
132 dst->f[1] = ceilf(src->f[1]);
133 dst->f[2] = ceilf(src->f[2]);
134 dst->f[3] = ceilf(src->f[3]);
135 }
136
137 static void
micro_cmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)138 micro_cmp(union tgsi_exec_channel *dst,
139 const union tgsi_exec_channel *src0,
140 const union tgsi_exec_channel *src1,
141 const union tgsi_exec_channel *src2)
142 {
143 dst->f[0] = src0->f[0] < 0.0f ? src1->f[0] : src2->f[0];
144 dst->f[1] = src0->f[1] < 0.0f ? src1->f[1] : src2->f[1];
145 dst->f[2] = src0->f[2] < 0.0f ? src1->f[2] : src2->f[2];
146 dst->f[3] = src0->f[3] < 0.0f ? src1->f[3] : src2->f[3];
147 }
148
149 static void
micro_cos(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)150 micro_cos(union tgsi_exec_channel *dst,
151 const union tgsi_exec_channel *src)
152 {
153 dst->f[0] = cosf(src->f[0]);
154 dst->f[1] = cosf(src->f[1]);
155 dst->f[2] = cosf(src->f[2]);
156 dst->f[3] = cosf(src->f[3]);
157 }
158
159 static void
micro_d2f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)160 micro_d2f(union tgsi_exec_channel *dst,
161 const union tgsi_double_channel *src)
162 {
163 dst->f[0] = (float)src->d[0];
164 dst->f[1] = (float)src->d[1];
165 dst->f[2] = (float)src->d[2];
166 dst->f[3] = (float)src->d[3];
167 }
168
169 static void
micro_d2i(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)170 micro_d2i(union tgsi_exec_channel *dst,
171 const union tgsi_double_channel *src)
172 {
173 dst->i[0] = (int)src->d[0];
174 dst->i[1] = (int)src->d[1];
175 dst->i[2] = (int)src->d[2];
176 dst->i[3] = (int)src->d[3];
177 }
178
179 static void
micro_d2u(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)180 micro_d2u(union tgsi_exec_channel *dst,
181 const union tgsi_double_channel *src)
182 {
183 dst->u[0] = (unsigned)src->d[0];
184 dst->u[1] = (unsigned)src->d[1];
185 dst->u[2] = (unsigned)src->d[2];
186 dst->u[3] = (unsigned)src->d[3];
187 }
188 static void
micro_dabs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)189 micro_dabs(union tgsi_double_channel *dst,
190 const union tgsi_double_channel *src)
191 {
192 dst->d[0] = src->d[0] >= 0.0 ? src->d[0] : -src->d[0];
193 dst->d[1] = src->d[1] >= 0.0 ? src->d[1] : -src->d[1];
194 dst->d[2] = src->d[2] >= 0.0 ? src->d[2] : -src->d[2];
195 dst->d[3] = src->d[3] >= 0.0 ? src->d[3] : -src->d[3];
196 }
197
198 static void
micro_dadd(union tgsi_double_channel * dst,const union tgsi_double_channel * src)199 micro_dadd(union tgsi_double_channel *dst,
200 const union tgsi_double_channel *src)
201 {
202 dst->d[0] = src[0].d[0] + src[1].d[0];
203 dst->d[1] = src[0].d[1] + src[1].d[1];
204 dst->d[2] = src[0].d[2] + src[1].d[2];
205 dst->d[3] = src[0].d[3] + src[1].d[3];
206 }
207
208 static void
micro_ddiv(union tgsi_double_channel * dst,const union tgsi_double_channel * src)209 micro_ddiv(union tgsi_double_channel *dst,
210 const union tgsi_double_channel *src)
211 {
212 dst->d[0] = src[0].d[0] / src[1].d[0];
213 dst->d[1] = src[0].d[1] / src[1].d[1];
214 dst->d[2] = src[0].d[2] / src[1].d[2];
215 dst->d[3] = src[0].d[3] / src[1].d[3];
216 }
217
218 static void
micro_ddx(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)219 micro_ddx(union tgsi_exec_channel *dst,
220 const union tgsi_exec_channel *src)
221 {
222 dst->f[0] =
223 dst->f[1] =
224 dst->f[2] =
225 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
226 }
227
228 static void
micro_ddx_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)229 micro_ddx_fine(union tgsi_exec_channel *dst,
230 const union tgsi_exec_channel *src)
231 {
232 dst->f[0] =
233 dst->f[1] = src->f[TILE_TOP_RIGHT] - src->f[TILE_TOP_LEFT];
234 dst->f[2] =
235 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_BOTTOM_LEFT];
236 }
237
238
239 static void
micro_ddy(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)240 micro_ddy(union tgsi_exec_channel *dst,
241 const union tgsi_exec_channel *src)
242 {
243 dst->f[0] =
244 dst->f[1] =
245 dst->f[2] =
246 dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
247 }
248
249 static void
micro_ddy_fine(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)250 micro_ddy_fine(union tgsi_exec_channel *dst,
251 const union tgsi_exec_channel *src)
252 {
253 dst->f[0] =
254 dst->f[2] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
255 dst->f[1] =
256 dst->f[3] = src->f[TILE_BOTTOM_RIGHT] - src->f[TILE_TOP_RIGHT];
257 }
258
259 static void
micro_dmul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)260 micro_dmul(union tgsi_double_channel *dst,
261 const union tgsi_double_channel *src)
262 {
263 dst->d[0] = src[0].d[0] * src[1].d[0];
264 dst->d[1] = src[0].d[1] * src[1].d[1];
265 dst->d[2] = src[0].d[2] * src[1].d[2];
266 dst->d[3] = src[0].d[3] * src[1].d[3];
267 }
268
269 static void
micro_dmax(union tgsi_double_channel * dst,const union tgsi_double_channel * src)270 micro_dmax(union tgsi_double_channel *dst,
271 const union tgsi_double_channel *src)
272 {
273 dst->d[0] = fmax(src[0].d[0], src[1].d[0]);
274 dst->d[1] = fmax(src[0].d[1], src[1].d[1]);
275 dst->d[2] = fmax(src[0].d[2], src[1].d[2]);
276 dst->d[3] = fmax(src[0].d[3], src[1].d[3]);
277 }
278
279 static void
micro_dmin(union tgsi_double_channel * dst,const union tgsi_double_channel * src)280 micro_dmin(union tgsi_double_channel *dst,
281 const union tgsi_double_channel *src)
282 {
283 dst->d[0] = fmin(src[0].d[0], src[1].d[0]);
284 dst->d[1] = fmin(src[0].d[1], src[1].d[1]);
285 dst->d[2] = fmin(src[0].d[2], src[1].d[2]);
286 dst->d[3] = fmin(src[0].d[3], src[1].d[3]);
287 }
288
289 static void
micro_dneg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)290 micro_dneg(union tgsi_double_channel *dst,
291 const union tgsi_double_channel *src)
292 {
293 dst->d[0] = -src->d[0];
294 dst->d[1] = -src->d[1];
295 dst->d[2] = -src->d[2];
296 dst->d[3] = -src->d[3];
297 }
298
299 static void
micro_dslt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)300 micro_dslt(union tgsi_double_channel *dst,
301 const union tgsi_double_channel *src)
302 {
303 dst->u[0][0] = src[0].d[0] < src[1].d[0] ? ~0U : 0U;
304 dst->u[1][0] = src[0].d[1] < src[1].d[1] ? ~0U : 0U;
305 dst->u[2][0] = src[0].d[2] < src[1].d[2] ? ~0U : 0U;
306 dst->u[3][0] = src[0].d[3] < src[1].d[3] ? ~0U : 0U;
307 }
308
309 static void
micro_dsne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)310 micro_dsne(union tgsi_double_channel *dst,
311 const union tgsi_double_channel *src)
312 {
313 dst->u[0][0] = src[0].d[0] != src[1].d[0] ? ~0U : 0U;
314 dst->u[1][0] = src[0].d[1] != src[1].d[1] ? ~0U : 0U;
315 dst->u[2][0] = src[0].d[2] != src[1].d[2] ? ~0U : 0U;
316 dst->u[3][0] = src[0].d[3] != src[1].d[3] ? ~0U : 0U;
317 }
318
319 static void
micro_dsge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)320 micro_dsge(union tgsi_double_channel *dst,
321 const union tgsi_double_channel *src)
322 {
323 dst->u[0][0] = src[0].d[0] >= src[1].d[0] ? ~0U : 0U;
324 dst->u[1][0] = src[0].d[1] >= src[1].d[1] ? ~0U : 0U;
325 dst->u[2][0] = src[0].d[2] >= src[1].d[2] ? ~0U : 0U;
326 dst->u[3][0] = src[0].d[3] >= src[1].d[3] ? ~0U : 0U;
327 }
328
329 static void
micro_dseq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)330 micro_dseq(union tgsi_double_channel *dst,
331 const union tgsi_double_channel *src)
332 {
333 dst->u[0][0] = src[0].d[0] == src[1].d[0] ? ~0U : 0U;
334 dst->u[1][0] = src[0].d[1] == src[1].d[1] ? ~0U : 0U;
335 dst->u[2][0] = src[0].d[2] == src[1].d[2] ? ~0U : 0U;
336 dst->u[3][0] = src[0].d[3] == src[1].d[3] ? ~0U : 0U;
337 }
338
339 static void
micro_drcp(union tgsi_double_channel * dst,const union tgsi_double_channel * src)340 micro_drcp(union tgsi_double_channel *dst,
341 const union tgsi_double_channel *src)
342 {
343 dst->d[0] = 1.0 / src->d[0];
344 dst->d[1] = 1.0 / src->d[1];
345 dst->d[2] = 1.0 / src->d[2];
346 dst->d[3] = 1.0 / src->d[3];
347 }
348
349 static void
micro_dsqrt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)350 micro_dsqrt(union tgsi_double_channel *dst,
351 const union tgsi_double_channel *src)
352 {
353 dst->d[0] = sqrt(src->d[0]);
354 dst->d[1] = sqrt(src->d[1]);
355 dst->d[2] = sqrt(src->d[2]);
356 dst->d[3] = sqrt(src->d[3]);
357 }
358
359 static void
micro_drsq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)360 micro_drsq(union tgsi_double_channel *dst,
361 const union tgsi_double_channel *src)
362 {
363 dst->d[0] = 1.0 / sqrt(src->d[0]);
364 dst->d[1] = 1.0 / sqrt(src->d[1]);
365 dst->d[2] = 1.0 / sqrt(src->d[2]);
366 dst->d[3] = 1.0 / sqrt(src->d[3]);
367 }
368
369 static void
micro_dmad(union tgsi_double_channel * dst,const union tgsi_double_channel * src)370 micro_dmad(union tgsi_double_channel *dst,
371 const union tgsi_double_channel *src)
372 {
373 dst->d[0] = src[0].d[0] * src[1].d[0] + src[2].d[0];
374 dst->d[1] = src[0].d[1] * src[1].d[1] + src[2].d[1];
375 dst->d[2] = src[0].d[2] * src[1].d[2] + src[2].d[2];
376 dst->d[3] = src[0].d[3] * src[1].d[3] + src[2].d[3];
377 }
378
379 static void
micro_dfrac(union tgsi_double_channel * dst,const union tgsi_double_channel * src)380 micro_dfrac(union tgsi_double_channel *dst,
381 const union tgsi_double_channel *src)
382 {
383 dst->d[0] = src->d[0] - floor(src->d[0]);
384 dst->d[1] = src->d[1] - floor(src->d[1]);
385 dst->d[2] = src->d[2] - floor(src->d[2]);
386 dst->d[3] = src->d[3] - floor(src->d[3]);
387 }
388
389 static void
micro_dflr(union tgsi_double_channel * dst,const union tgsi_double_channel * src)390 micro_dflr(union tgsi_double_channel *dst,
391 const union tgsi_double_channel *src)
392 {
393 dst->d[0] = floor(src->d[0]);
394 dst->d[1] = floor(src->d[1]);
395 dst->d[2] = floor(src->d[2]);
396 dst->d[3] = floor(src->d[3]);
397 }
398
399 static void
micro_dldexp(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)400 micro_dldexp(union tgsi_double_channel *dst,
401 const union tgsi_double_channel *src0,
402 union tgsi_exec_channel *src1)
403 {
404 dst->d[0] = ldexp(src0->d[0], src1->i[0]);
405 dst->d[1] = ldexp(src0->d[1], src1->i[1]);
406 dst->d[2] = ldexp(src0->d[2], src1->i[2]);
407 dst->d[3] = ldexp(src0->d[3], src1->i[3]);
408 }
409
410 static void
micro_exp2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)411 micro_exp2(union tgsi_exec_channel *dst,
412 const union tgsi_exec_channel *src)
413 {
414 #if MESA_DEBUG
415 /* Inf is okay for this instruction, so clamp it to silence assertions. */
416 unsigned i;
417 union tgsi_exec_channel clamped;
418
419 for (i = 0; i < 4; i++) {
420 if (src->f[i] > 127.99999f) {
421 clamped.f[i] = 127.99999f;
422 } else if (src->f[i] < -126.99999f) {
423 clamped.f[i] = -126.99999f;
424 } else {
425 clamped.f[i] = src->f[i];
426 }
427 }
428 src = &clamped;
429 #endif /* MESA_DEBUG */
430
431 dst->f[0] = powf(2.0f, src->f[0]);
432 dst->f[1] = powf(2.0f, src->f[1]);
433 dst->f[2] = powf(2.0f, src->f[2]);
434 dst->f[3] = powf(2.0f, src->f[3]);
435 }
436
437 static void
micro_f2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)438 micro_f2d(union tgsi_double_channel *dst,
439 const union tgsi_exec_channel *src)
440 {
441 dst->d[0] = (double)src->f[0];
442 dst->d[1] = (double)src->f[1];
443 dst->d[2] = (double)src->f[2];
444 dst->d[3] = (double)src->f[3];
445 }
446
447 static void
micro_flr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)448 micro_flr(union tgsi_exec_channel *dst,
449 const union tgsi_exec_channel *src)
450 {
451 dst->f[0] = floorf(src->f[0]);
452 dst->f[1] = floorf(src->f[1]);
453 dst->f[2] = floorf(src->f[2]);
454 dst->f[3] = floorf(src->f[3]);
455 }
456
457 static void
micro_frc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)458 micro_frc(union tgsi_exec_channel *dst,
459 const union tgsi_exec_channel *src)
460 {
461 dst->f[0] = src->f[0] - floorf(src->f[0]);
462 dst->f[1] = src->f[1] - floorf(src->f[1]);
463 dst->f[2] = src->f[2] - floorf(src->f[2]);
464 dst->f[3] = src->f[3] - floorf(src->f[3]);
465 }
466
467 static void
micro_i2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)468 micro_i2d(union tgsi_double_channel *dst,
469 const union tgsi_exec_channel *src)
470 {
471 dst->d[0] = (double)src->i[0];
472 dst->d[1] = (double)src->i[1];
473 dst->d[2] = (double)src->i[2];
474 dst->d[3] = (double)src->i[3];
475 }
476
477 static void
micro_iabs(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)478 micro_iabs(union tgsi_exec_channel *dst,
479 const union tgsi_exec_channel *src)
480 {
481 dst->i[0] = src->i[0] >= 0 ? src->i[0] : -src->i[0];
482 dst->i[1] = src->i[1] >= 0 ? src->i[1] : -src->i[1];
483 dst->i[2] = src->i[2] >= 0 ? src->i[2] : -src->i[2];
484 dst->i[3] = src->i[3] >= 0 ? src->i[3] : -src->i[3];
485 }
486
487 static void
micro_ineg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)488 micro_ineg(union tgsi_exec_channel *dst,
489 const union tgsi_exec_channel *src)
490 {
491 dst->i[0] = -src->i[0];
492 dst->i[1] = -src->i[1];
493 dst->i[2] = -src->i[2];
494 dst->i[3] = -src->i[3];
495 }
496
497 static void
micro_lg2(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)498 micro_lg2(union tgsi_exec_channel *dst,
499 const union tgsi_exec_channel *src)
500 {
501 dst->f[0] = logf(src->f[0]) * 1.442695f;
502 dst->f[1] = logf(src->f[1]) * 1.442695f;
503 dst->f[2] = logf(src->f[2]) * 1.442695f;
504 dst->f[3] = logf(src->f[3]) * 1.442695f;
505 }
506
507 static void
micro_lrp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)508 micro_lrp(union tgsi_exec_channel *dst,
509 const union tgsi_exec_channel *src0,
510 const union tgsi_exec_channel *src1,
511 const union tgsi_exec_channel *src2)
512 {
513 dst->f[0] = src0->f[0] * (src1->f[0] - src2->f[0]) + src2->f[0];
514 dst->f[1] = src0->f[1] * (src1->f[1] - src2->f[1]) + src2->f[1];
515 dst->f[2] = src0->f[2] * (src1->f[2] - src2->f[2]) + src2->f[2];
516 dst->f[3] = src0->f[3] * (src1->f[3] - src2->f[3]) + src2->f[3];
517 }
518
519 static void
micro_mad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)520 micro_mad(union tgsi_exec_channel *dst,
521 const union tgsi_exec_channel *src0,
522 const union tgsi_exec_channel *src1,
523 const union tgsi_exec_channel *src2)
524 {
525 dst->f[0] = src0->f[0] * src1->f[0] + src2->f[0];
526 dst->f[1] = src0->f[1] * src1->f[1] + src2->f[1];
527 dst->f[2] = src0->f[2] * src1->f[2] + src2->f[2];
528 dst->f[3] = src0->f[3] * src1->f[3] + src2->f[3];
529 }
530
531 static void
micro_mov(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)532 micro_mov(union tgsi_exec_channel *dst,
533 const union tgsi_exec_channel *src)
534 {
535 dst->u[0] = src->u[0];
536 dst->u[1] = src->u[1];
537 dst->u[2] = src->u[2];
538 dst->u[3] = src->u[3];
539 }
540
541 static void
micro_rcp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)542 micro_rcp(union tgsi_exec_channel *dst,
543 const union tgsi_exec_channel *src)
544 {
545 #if 0 /* for debugging */
546 assert(src->f[0] != 0.0f);
547 assert(src->f[1] != 0.0f);
548 assert(src->f[2] != 0.0f);
549 assert(src->f[3] != 0.0f);
550 #endif
551 dst->f[0] = 1.0f / src->f[0];
552 dst->f[1] = 1.0f / src->f[1];
553 dst->f[2] = 1.0f / src->f[2];
554 dst->f[3] = 1.0f / src->f[3];
555 }
556
557 static void
micro_rnd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)558 micro_rnd(union tgsi_exec_channel *dst,
559 const union tgsi_exec_channel *src)
560 {
561 dst->f[0] = _mesa_roundevenf(src->f[0]);
562 dst->f[1] = _mesa_roundevenf(src->f[1]);
563 dst->f[2] = _mesa_roundevenf(src->f[2]);
564 dst->f[3] = _mesa_roundevenf(src->f[3]);
565 }
566
567 static void
micro_rsq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)568 micro_rsq(union tgsi_exec_channel *dst,
569 const union tgsi_exec_channel *src)
570 {
571 #if 0 /* for debugging */
572 assert(src->f[0] != 0.0f);
573 assert(src->f[1] != 0.0f);
574 assert(src->f[2] != 0.0f);
575 assert(src->f[3] != 0.0f);
576 #endif
577 dst->f[0] = 1.0f / sqrtf(src->f[0]);
578 dst->f[1] = 1.0f / sqrtf(src->f[1]);
579 dst->f[2] = 1.0f / sqrtf(src->f[2]);
580 dst->f[3] = 1.0f / sqrtf(src->f[3]);
581 }
582
583 static void
micro_sqrt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)584 micro_sqrt(union tgsi_exec_channel *dst,
585 const union tgsi_exec_channel *src)
586 {
587 dst->f[0] = sqrtf(src->f[0]);
588 dst->f[1] = sqrtf(src->f[1]);
589 dst->f[2] = sqrtf(src->f[2]);
590 dst->f[3] = sqrtf(src->f[3]);
591 }
592
593 static void
micro_seq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)594 micro_seq(union tgsi_exec_channel *dst,
595 const union tgsi_exec_channel *src0,
596 const union tgsi_exec_channel *src1)
597 {
598 dst->f[0] = src0->f[0] == src1->f[0] ? 1.0f : 0.0f;
599 dst->f[1] = src0->f[1] == src1->f[1] ? 1.0f : 0.0f;
600 dst->f[2] = src0->f[2] == src1->f[2] ? 1.0f : 0.0f;
601 dst->f[3] = src0->f[3] == src1->f[3] ? 1.0f : 0.0f;
602 }
603
604 static void
micro_sge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)605 micro_sge(union tgsi_exec_channel *dst,
606 const union tgsi_exec_channel *src0,
607 const union tgsi_exec_channel *src1)
608 {
609 dst->f[0] = src0->f[0] >= src1->f[0] ? 1.0f : 0.0f;
610 dst->f[1] = src0->f[1] >= src1->f[1] ? 1.0f : 0.0f;
611 dst->f[2] = src0->f[2] >= src1->f[2] ? 1.0f : 0.0f;
612 dst->f[3] = src0->f[3] >= src1->f[3] ? 1.0f : 0.0f;
613 }
614
615 static void
micro_sgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)616 micro_sgn(union tgsi_exec_channel *dst,
617 const union tgsi_exec_channel *src)
618 {
619 dst->f[0] = src->f[0] < 0.0f ? -1.0f : src->f[0] > 0.0f ? 1.0f : 0.0f;
620 dst->f[1] = src->f[1] < 0.0f ? -1.0f : src->f[1] > 0.0f ? 1.0f : 0.0f;
621 dst->f[2] = src->f[2] < 0.0f ? -1.0f : src->f[2] > 0.0f ? 1.0f : 0.0f;
622 dst->f[3] = src->f[3] < 0.0f ? -1.0f : src->f[3] > 0.0f ? 1.0f : 0.0f;
623 }
624
625 static void
micro_isgn(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)626 micro_isgn(union tgsi_exec_channel *dst,
627 const union tgsi_exec_channel *src)
628 {
629 dst->i[0] = src->i[0] < 0 ? -1 : src->i[0] > 0 ? 1 : 0;
630 dst->i[1] = src->i[1] < 0 ? -1 : src->i[1] > 0 ? 1 : 0;
631 dst->i[2] = src->i[2] < 0 ? -1 : src->i[2] > 0 ? 1 : 0;
632 dst->i[3] = src->i[3] < 0 ? -1 : src->i[3] > 0 ? 1 : 0;
633 }
634
635 static void
micro_sgt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)636 micro_sgt(union tgsi_exec_channel *dst,
637 const union tgsi_exec_channel *src0,
638 const union tgsi_exec_channel *src1)
639 {
640 dst->f[0] = src0->f[0] > src1->f[0] ? 1.0f : 0.0f;
641 dst->f[1] = src0->f[1] > src1->f[1] ? 1.0f : 0.0f;
642 dst->f[2] = src0->f[2] > src1->f[2] ? 1.0f : 0.0f;
643 dst->f[3] = src0->f[3] > src1->f[3] ? 1.0f : 0.0f;
644 }
645
646 static void
micro_sin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)647 micro_sin(union tgsi_exec_channel *dst,
648 const union tgsi_exec_channel *src)
649 {
650 dst->f[0] = sinf(src->f[0]);
651 dst->f[1] = sinf(src->f[1]);
652 dst->f[2] = sinf(src->f[2]);
653 dst->f[3] = sinf(src->f[3]);
654 }
655
656 static void
micro_sle(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)657 micro_sle(union tgsi_exec_channel *dst,
658 const union tgsi_exec_channel *src0,
659 const union tgsi_exec_channel *src1)
660 {
661 dst->f[0] = src0->f[0] <= src1->f[0] ? 1.0f : 0.0f;
662 dst->f[1] = src0->f[1] <= src1->f[1] ? 1.0f : 0.0f;
663 dst->f[2] = src0->f[2] <= src1->f[2] ? 1.0f : 0.0f;
664 dst->f[3] = src0->f[3] <= src1->f[3] ? 1.0f : 0.0f;
665 }
666
667 static void
micro_slt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)668 micro_slt(union tgsi_exec_channel *dst,
669 const union tgsi_exec_channel *src0,
670 const union tgsi_exec_channel *src1)
671 {
672 dst->f[0] = src0->f[0] < src1->f[0] ? 1.0f : 0.0f;
673 dst->f[1] = src0->f[1] < src1->f[1] ? 1.0f : 0.0f;
674 dst->f[2] = src0->f[2] < src1->f[2] ? 1.0f : 0.0f;
675 dst->f[3] = src0->f[3] < src1->f[3] ? 1.0f : 0.0f;
676 }
677
678 static void
micro_sne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)679 micro_sne(union tgsi_exec_channel *dst,
680 const union tgsi_exec_channel *src0,
681 const union tgsi_exec_channel *src1)
682 {
683 dst->f[0] = src0->f[0] != src1->f[0] ? 1.0f : 0.0f;
684 dst->f[1] = src0->f[1] != src1->f[1] ? 1.0f : 0.0f;
685 dst->f[2] = src0->f[2] != src1->f[2] ? 1.0f : 0.0f;
686 dst->f[3] = src0->f[3] != src1->f[3] ? 1.0f : 0.0f;
687 }
688
689 static void
micro_trunc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)690 micro_trunc(union tgsi_exec_channel *dst,
691 const union tgsi_exec_channel *src)
692 {
693 dst->f[0] = truncf(src->f[0]);
694 dst->f[1] = truncf(src->f[1]);
695 dst->f[2] = truncf(src->f[2]);
696 dst->f[3] = truncf(src->f[3]);
697 }
698
699 static void
micro_u2d(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)700 micro_u2d(union tgsi_double_channel *dst,
701 const union tgsi_exec_channel *src)
702 {
703 dst->d[0] = (double)src->u[0];
704 dst->d[1] = (double)src->u[1];
705 dst->d[2] = (double)src->u[2];
706 dst->d[3] = (double)src->u[3];
707 }
708
709 static void
micro_i64abs(union tgsi_double_channel * dst,const union tgsi_double_channel * src)710 micro_i64abs(union tgsi_double_channel *dst,
711 const union tgsi_double_channel *src)
712 {
713 dst->i64[0] = src->i64[0] >= 0.0 ? src->i64[0] : -src->i64[0];
714 dst->i64[1] = src->i64[1] >= 0.0 ? src->i64[1] : -src->i64[1];
715 dst->i64[2] = src->i64[2] >= 0.0 ? src->i64[2] : -src->i64[2];
716 dst->i64[3] = src->i64[3] >= 0.0 ? src->i64[3] : -src->i64[3];
717 }
718
719 static void
micro_i64sgn(union tgsi_double_channel * dst,const union tgsi_double_channel * src)720 micro_i64sgn(union tgsi_double_channel *dst,
721 const union tgsi_double_channel *src)
722 {
723 dst->i64[0] = src->i64[0] < 0 ? -1 : src->i64[0] > 0 ? 1 : 0;
724 dst->i64[1] = src->i64[1] < 0 ? -1 : src->i64[1] > 0 ? 1 : 0;
725 dst->i64[2] = src->i64[2] < 0 ? -1 : src->i64[2] > 0 ? 1 : 0;
726 dst->i64[3] = src->i64[3] < 0 ? -1 : src->i64[3] > 0 ? 1 : 0;
727 }
728
729 static void
micro_i64neg(union tgsi_double_channel * dst,const union tgsi_double_channel * src)730 micro_i64neg(union tgsi_double_channel *dst,
731 const union tgsi_double_channel *src)
732 {
733 dst->i64[0] = -src->i64[0];
734 dst->i64[1] = -src->i64[1];
735 dst->i64[2] = -src->i64[2];
736 dst->i64[3] = -src->i64[3];
737 }
738
739 static void
micro_u64seq(union tgsi_double_channel * dst,const union tgsi_double_channel * src)740 micro_u64seq(union tgsi_double_channel *dst,
741 const union tgsi_double_channel *src)
742 {
743 dst->u[0][0] = src[0].u64[0] == src[1].u64[0] ? ~0U : 0U;
744 dst->u[1][0] = src[0].u64[1] == src[1].u64[1] ? ~0U : 0U;
745 dst->u[2][0] = src[0].u64[2] == src[1].u64[2] ? ~0U : 0U;
746 dst->u[3][0] = src[0].u64[3] == src[1].u64[3] ? ~0U : 0U;
747 }
748
749 static void
micro_u64sne(union tgsi_double_channel * dst,const union tgsi_double_channel * src)750 micro_u64sne(union tgsi_double_channel *dst,
751 const union tgsi_double_channel *src)
752 {
753 dst->u[0][0] = src[0].u64[0] != src[1].u64[0] ? ~0U : 0U;
754 dst->u[1][0] = src[0].u64[1] != src[1].u64[1] ? ~0U : 0U;
755 dst->u[2][0] = src[0].u64[2] != src[1].u64[2] ? ~0U : 0U;
756 dst->u[3][0] = src[0].u64[3] != src[1].u64[3] ? ~0U : 0U;
757 }
758
759 static void
micro_i64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)760 micro_i64slt(union tgsi_double_channel *dst,
761 const union tgsi_double_channel *src)
762 {
763 dst->u[0][0] = src[0].i64[0] < src[1].i64[0] ? ~0U : 0U;
764 dst->u[1][0] = src[0].i64[1] < src[1].i64[1] ? ~0U : 0U;
765 dst->u[2][0] = src[0].i64[2] < src[1].i64[2] ? ~0U : 0U;
766 dst->u[3][0] = src[0].i64[3] < src[1].i64[3] ? ~0U : 0U;
767 }
768
769 static void
micro_u64slt(union tgsi_double_channel * dst,const union tgsi_double_channel * src)770 micro_u64slt(union tgsi_double_channel *dst,
771 const union tgsi_double_channel *src)
772 {
773 dst->u[0][0] = src[0].u64[0] < src[1].u64[0] ? ~0U : 0U;
774 dst->u[1][0] = src[0].u64[1] < src[1].u64[1] ? ~0U : 0U;
775 dst->u[2][0] = src[0].u64[2] < src[1].u64[2] ? ~0U : 0U;
776 dst->u[3][0] = src[0].u64[3] < src[1].u64[3] ? ~0U : 0U;
777 }
778
779 static void
micro_i64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)780 micro_i64sge(union tgsi_double_channel *dst,
781 const union tgsi_double_channel *src)
782 {
783 dst->u[0][0] = src[0].i64[0] >= src[1].i64[0] ? ~0U : 0U;
784 dst->u[1][0] = src[0].i64[1] >= src[1].i64[1] ? ~0U : 0U;
785 dst->u[2][0] = src[0].i64[2] >= src[1].i64[2] ? ~0U : 0U;
786 dst->u[3][0] = src[0].i64[3] >= src[1].i64[3] ? ~0U : 0U;
787 }
788
789 static void
micro_u64sge(union tgsi_double_channel * dst,const union tgsi_double_channel * src)790 micro_u64sge(union tgsi_double_channel *dst,
791 const union tgsi_double_channel *src)
792 {
793 dst->u[0][0] = src[0].u64[0] >= src[1].u64[0] ? ~0U : 0U;
794 dst->u[1][0] = src[0].u64[1] >= src[1].u64[1] ? ~0U : 0U;
795 dst->u[2][0] = src[0].u64[2] >= src[1].u64[2] ? ~0U : 0U;
796 dst->u[3][0] = src[0].u64[3] >= src[1].u64[3] ? ~0U : 0U;
797 }
798
799 static void
micro_u64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)800 micro_u64max(union tgsi_double_channel *dst,
801 const union tgsi_double_channel *src)
802 {
803 dst->u64[0] = src[0].u64[0] > src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
804 dst->u64[1] = src[0].u64[1] > src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
805 dst->u64[2] = src[0].u64[2] > src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
806 dst->u64[3] = src[0].u64[3] > src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
807 }
808
809 static void
micro_i64max(union tgsi_double_channel * dst,const union tgsi_double_channel * src)810 micro_i64max(union tgsi_double_channel *dst,
811 const union tgsi_double_channel *src)
812 {
813 dst->i64[0] = src[0].i64[0] > src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
814 dst->i64[1] = src[0].i64[1] > src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
815 dst->i64[2] = src[0].i64[2] > src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
816 dst->i64[3] = src[0].i64[3] > src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
817 }
818
819 static void
micro_u64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)820 micro_u64min(union tgsi_double_channel *dst,
821 const union tgsi_double_channel *src)
822 {
823 dst->u64[0] = src[0].u64[0] < src[1].u64[0] ? src[0].u64[0] : src[1].u64[0];
824 dst->u64[1] = src[0].u64[1] < src[1].u64[1] ? src[0].u64[1] : src[1].u64[1];
825 dst->u64[2] = src[0].u64[2] < src[1].u64[2] ? src[0].u64[2] : src[1].u64[2];
826 dst->u64[3] = src[0].u64[3] < src[1].u64[3] ? src[0].u64[3] : src[1].u64[3];
827 }
828
829 static void
micro_i64min(union tgsi_double_channel * dst,const union tgsi_double_channel * src)830 micro_i64min(union tgsi_double_channel *dst,
831 const union tgsi_double_channel *src)
832 {
833 dst->i64[0] = src[0].i64[0] < src[1].i64[0] ? src[0].i64[0] : src[1].i64[0];
834 dst->i64[1] = src[0].i64[1] < src[1].i64[1] ? src[0].i64[1] : src[1].i64[1];
835 dst->i64[2] = src[0].i64[2] < src[1].i64[2] ? src[0].i64[2] : src[1].i64[2];
836 dst->i64[3] = src[0].i64[3] < src[1].i64[3] ? src[0].i64[3] : src[1].i64[3];
837 }
838
839 static void
micro_u64add(union tgsi_double_channel * dst,const union tgsi_double_channel * src)840 micro_u64add(union tgsi_double_channel *dst,
841 const union tgsi_double_channel *src)
842 {
843 dst->u64[0] = src[0].u64[0] + src[1].u64[0];
844 dst->u64[1] = src[0].u64[1] + src[1].u64[1];
845 dst->u64[2] = src[0].u64[2] + src[1].u64[2];
846 dst->u64[3] = src[0].u64[3] + src[1].u64[3];
847 }
848
849 static void
micro_u64mul(union tgsi_double_channel * dst,const union tgsi_double_channel * src)850 micro_u64mul(union tgsi_double_channel *dst,
851 const union tgsi_double_channel *src)
852 {
853 dst->u64[0] = src[0].u64[0] * src[1].u64[0];
854 dst->u64[1] = src[0].u64[1] * src[1].u64[1];
855 dst->u64[2] = src[0].u64[2] * src[1].u64[2];
856 dst->u64[3] = src[0].u64[3] * src[1].u64[3];
857 }
858
859 static void
micro_u64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)860 micro_u64div(union tgsi_double_channel *dst,
861 const union tgsi_double_channel *src)
862 {
863 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] / src[1].u64[0] : ~0ull;
864 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] / src[1].u64[1] : ~0ull;
865 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] / src[1].u64[2] : ~0ull;
866 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] / src[1].u64[3] : ~0ull;
867 }
868
869 static void
micro_i64div(union tgsi_double_channel * dst,const union tgsi_double_channel * src)870 micro_i64div(union tgsi_double_channel *dst,
871 const union tgsi_double_channel *src)
872 {
873 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] / src[1].i64[0] : 0;
874 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] / src[1].i64[1] : 0;
875 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] / src[1].i64[2] : 0;
876 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] / src[1].i64[3] : 0;
877 }
878
879 static void
micro_u64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)880 micro_u64mod(union tgsi_double_channel *dst,
881 const union tgsi_double_channel *src)
882 {
883 dst->u64[0] = src[1].u64[0] ? src[0].u64[0] % src[1].u64[0] : ~0ull;
884 dst->u64[1] = src[1].u64[1] ? src[0].u64[1] % src[1].u64[1] : ~0ull;
885 dst->u64[2] = src[1].u64[2] ? src[0].u64[2] % src[1].u64[2] : ~0ull;
886 dst->u64[3] = src[1].u64[3] ? src[0].u64[3] % src[1].u64[3] : ~0ull;
887 }
888
889 static void
micro_i64mod(union tgsi_double_channel * dst,const union tgsi_double_channel * src)890 micro_i64mod(union tgsi_double_channel *dst,
891 const union tgsi_double_channel *src)
892 {
893 dst->i64[0] = src[1].i64[0] ? src[0].i64[0] % src[1].i64[0] : ~0ll;
894 dst->i64[1] = src[1].i64[1] ? src[0].i64[1] % src[1].i64[1] : ~0ll;
895 dst->i64[2] = src[1].i64[2] ? src[0].i64[2] % src[1].i64[2] : ~0ll;
896 dst->i64[3] = src[1].i64[3] ? src[0].i64[3] % src[1].i64[3] : ~0ll;
897 }
898
899 static void
micro_u64shl(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)900 micro_u64shl(union tgsi_double_channel *dst,
901 const union tgsi_double_channel *src0,
902 union tgsi_exec_channel *src1)
903 {
904 unsigned masked_count;
905 masked_count = src1->u[0] & 0x3f;
906 dst->u64[0] = src0->u64[0] << masked_count;
907 masked_count = src1->u[1] & 0x3f;
908 dst->u64[1] = src0->u64[1] << masked_count;
909 masked_count = src1->u[2] & 0x3f;
910 dst->u64[2] = src0->u64[2] << masked_count;
911 masked_count = src1->u[3] & 0x3f;
912 dst->u64[3] = src0->u64[3] << masked_count;
913 }
914
915 static void
micro_i64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)916 micro_i64shr(union tgsi_double_channel *dst,
917 const union tgsi_double_channel *src0,
918 union tgsi_exec_channel *src1)
919 {
920 unsigned masked_count;
921 masked_count = src1->u[0] & 0x3f;
922 dst->i64[0] = src0->i64[0] >> masked_count;
923 masked_count = src1->u[1] & 0x3f;
924 dst->i64[1] = src0->i64[1] >> masked_count;
925 masked_count = src1->u[2] & 0x3f;
926 dst->i64[2] = src0->i64[2] >> masked_count;
927 masked_count = src1->u[3] & 0x3f;
928 dst->i64[3] = src0->i64[3] >> masked_count;
929 }
930
931 static void
micro_u64shr(union tgsi_double_channel * dst,const union tgsi_double_channel * src0,union tgsi_exec_channel * src1)932 micro_u64shr(union tgsi_double_channel *dst,
933 const union tgsi_double_channel *src0,
934 union tgsi_exec_channel *src1)
935 {
936 unsigned masked_count;
937 masked_count = src1->u[0] & 0x3f;
938 dst->u64[0] = src0->u64[0] >> masked_count;
939 masked_count = src1->u[1] & 0x3f;
940 dst->u64[1] = src0->u64[1] >> masked_count;
941 masked_count = src1->u[2] & 0x3f;
942 dst->u64[2] = src0->u64[2] >> masked_count;
943 masked_count = src1->u[3] & 0x3f;
944 dst->u64[3] = src0->u64[3] >> masked_count;
945 }
946
947 enum tgsi_exec_datatype {
948 TGSI_EXEC_DATA_FLOAT,
949 TGSI_EXEC_DATA_INT,
950 TGSI_EXEC_DATA_UINT,
951 TGSI_EXEC_DATA_DOUBLE,
952 TGSI_EXEC_DATA_INT64,
953 TGSI_EXEC_DATA_UINT64,
954 };
955
956 /** The execution mask depends on the conditional mask and the loop mask */
957 #define UPDATE_EXEC_MASK(MACH) \
958 MACH->ExecMask = MACH->CondMask & MACH->LoopMask & MACH->ContMask & MACH->Switch.mask & MACH->FuncMask
959
960
961 static const union tgsi_exec_channel ZeroVec =
962 { { 0.0, 0.0, 0.0, 0.0 } };
963
964 static const union tgsi_exec_channel OneVec = {
965 {1.0f, 1.0f, 1.0f, 1.0f}
966 };
967
968 static const union tgsi_exec_channel P128Vec = {
969 {128.0f, 128.0f, 128.0f, 128.0f}
970 };
971
972 static const union tgsi_exec_channel M128Vec = {
973 {-128.0f, -128.0f, -128.0f, -128.0f}
974 };
975
976 #if MESA_DEBUG
977 static void
print_chan(const char * msg,const union tgsi_exec_channel * chan)978 print_chan(const char *msg, const union tgsi_exec_channel *chan)
979 {
980 debug_printf("%s = {%f, %f, %f, %f}\n",
981 msg, chan->f[0], chan->f[1], chan->f[2], chan->f[3]);
982 }
983 #endif
984
985
986 #if MESA_DEBUG
987 static void
print_temp(const struct tgsi_exec_machine * mach,unsigned index)988 print_temp(const struct tgsi_exec_machine *mach, unsigned index)
989 {
990 const struct tgsi_exec_vector *tmp = &mach->Temps[index];
991 int i;
992 debug_printf("Temp[%u] =\n", index);
993 for (i = 0; i < 4; i++) {
994 debug_printf(" %c: { %f, %f, %f, %f }\n",
995 "XYZW"[i],
996 tmp->xyzw[i].f[0],
997 tmp->xyzw[i].f[1],
998 tmp->xyzw[i].f[2],
999 tmp->xyzw[i].f[3]);
1000 }
1001 }
1002 #endif
1003
1004
1005 void
tgsi_exec_set_constant_buffers(struct tgsi_exec_machine * mach,unsigned num_bufs,const struct tgsi_exec_consts_info * bufs)1006 tgsi_exec_set_constant_buffers(struct tgsi_exec_machine *mach,
1007 unsigned num_bufs,
1008 const struct tgsi_exec_consts_info *bufs)
1009 {
1010 unsigned i;
1011
1012 for (i = 0; i < num_bufs; i++) {
1013 mach->Consts[i] = bufs[i].ptr;
1014 mach->ConstsSize[i] = bufs[i].size;
1015 }
1016 }
1017
1018 /**
1019 * Initialize machine state by expanding tokens to full instructions,
1020 * allocating temporary storage, setting up constants, etc.
1021 * After this, we can call tgsi_exec_machine_run() many times.
1022 */
1023 void
tgsi_exec_machine_bind_shader(struct tgsi_exec_machine * mach,const struct tgsi_token * tokens,struct tgsi_sampler * sampler,struct tgsi_image * image,struct tgsi_buffer * buffer)1024 tgsi_exec_machine_bind_shader(
1025 struct tgsi_exec_machine *mach,
1026 const struct tgsi_token *tokens,
1027 struct tgsi_sampler *sampler,
1028 struct tgsi_image *image,
1029 struct tgsi_buffer *buffer)
1030 {
1031 unsigned k;
1032 struct tgsi_parse_context parse;
1033 struct tgsi_full_instruction *instructions;
1034 struct tgsi_full_declaration *declarations;
1035 unsigned maxInstructions = 10, numInstructions = 0;
1036 unsigned maxDeclarations = 10, numDeclarations = 0;
1037
1038 #if 0
1039 tgsi_dump(tokens, 0);
1040 #endif
1041
1042 mach->Tokens = tokens;
1043 mach->Sampler = sampler;
1044 mach->Image = image;
1045 mach->Buffer = buffer;
1046
1047 if (!tokens) {
1048 /* unbind and free all */
1049 FREE(mach->Declarations);
1050 mach->Declarations = NULL;
1051 mach->NumDeclarations = 0;
1052
1053 FREE(mach->Instructions);
1054 mach->Instructions = NULL;
1055 mach->NumInstructions = 0;
1056
1057 return;
1058 }
1059
1060 k = tgsi_parse_init (&parse, mach->Tokens);
1061 if (k != TGSI_PARSE_OK) {
1062 debug_printf( "Problem parsing!\n" );
1063 return;
1064 }
1065
1066 mach->ImmLimit = 0;
1067 mach->NumOutputs = 0;
1068
1069 for (k = 0; k < TGSI_SEMANTIC_COUNT; k++)
1070 mach->SysSemanticToIndex[k] = -1;
1071
1072 if (mach->ShaderType == PIPE_SHADER_GEOMETRY &&
1073 !mach->UsedGeometryShader) {
1074 struct tgsi_exec_vector *inputs;
1075 struct tgsi_exec_vector *outputs;
1076
1077 inputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1078 TGSI_MAX_PRIM_VERTICES * PIPE_MAX_SHADER_INPUTS,
1079 16);
1080
1081 if (!inputs)
1082 return;
1083
1084 outputs = align_malloc(sizeof(struct tgsi_exec_vector) *
1085 TGSI_MAX_TOTAL_VERTICES, 16);
1086
1087 if (!outputs) {
1088 align_free(inputs);
1089 return;
1090 }
1091
1092 align_free(mach->Inputs);
1093 align_free(mach->Outputs);
1094
1095 mach->Inputs = inputs;
1096 mach->Outputs = outputs;
1097 mach->UsedGeometryShader = true;
1098 }
1099
1100 declarations = (struct tgsi_full_declaration *)
1101 MALLOC( maxDeclarations * sizeof(struct tgsi_full_declaration) );
1102
1103 if (!declarations) {
1104 return;
1105 }
1106
1107 instructions = (struct tgsi_full_instruction *)
1108 MALLOC( maxInstructions * sizeof(struct tgsi_full_instruction) );
1109
1110 if (!instructions) {
1111 FREE( declarations );
1112 return;
1113 }
1114
1115 while( !tgsi_parse_end_of_tokens( &parse ) ) {
1116 unsigned i;
1117
1118 tgsi_parse_token( &parse );
1119 switch( parse.FullToken.Token.Type ) {
1120 case TGSI_TOKEN_TYPE_DECLARATION:
1121 /* save expanded declaration */
1122 if (numDeclarations == maxDeclarations) {
1123 declarations = REALLOC(declarations,
1124 maxDeclarations
1125 * sizeof(struct tgsi_full_declaration),
1126 (maxDeclarations + 10)
1127 * sizeof(struct tgsi_full_declaration));
1128 maxDeclarations += 10;
1129 }
1130 if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_OUTPUT)
1131 mach->NumOutputs = MAX2(mach->NumOutputs, parse.FullToken.FullDeclaration.Range.Last + 1);
1132 else if (parse.FullToken.FullDeclaration.Declaration.File == TGSI_FILE_SYSTEM_VALUE) {
1133 const struct tgsi_full_declaration *decl = &parse.FullToken.FullDeclaration;
1134 mach->SysSemanticToIndex[decl->Semantic.Name] = decl->Range.First;
1135 }
1136
1137 memcpy(declarations + numDeclarations,
1138 &parse.FullToken.FullDeclaration,
1139 sizeof(declarations[0]));
1140 numDeclarations++;
1141 break;
1142
1143 case TGSI_TOKEN_TYPE_IMMEDIATE:
1144 {
1145 unsigned size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
1146 assert( size <= 4 );
1147 if (mach->ImmLimit >= mach->ImmsReserved) {
1148 unsigned newReserved = mach->ImmsReserved ? 2 * mach->ImmsReserved : 128;
1149 float4 *imms = REALLOC(mach->Imms, mach->ImmsReserved, newReserved * sizeof(float4));
1150 if (imms) {
1151 mach->ImmsReserved = newReserved;
1152 mach->Imms = imms;
1153 } else {
1154 debug_printf("Unable to (re)allocate space for immidiate constants\n");
1155 break;
1156 }
1157 }
1158
1159 for( i = 0; i < size; i++ ) {
1160 mach->Imms[mach->ImmLimit][i] =
1161 parse.FullToken.FullImmediate.u[i].Float;
1162 }
1163 mach->ImmLimit += 1;
1164 }
1165 break;
1166
1167 case TGSI_TOKEN_TYPE_INSTRUCTION:
1168
1169 /* save expanded instruction */
1170 if (numInstructions == maxInstructions) {
1171 instructions = REALLOC(instructions,
1172 maxInstructions
1173 * sizeof(struct tgsi_full_instruction),
1174 (maxInstructions + 10)
1175 * sizeof(struct tgsi_full_instruction));
1176 maxInstructions += 10;
1177 }
1178
1179 memcpy(instructions + numInstructions,
1180 &parse.FullToken.FullInstruction,
1181 sizeof(instructions[0]));
1182
1183 numInstructions++;
1184 break;
1185
1186 case TGSI_TOKEN_TYPE_PROPERTY:
1187 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
1188 if (parse.FullToken.FullProperty.Property.PropertyName == TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES) {
1189 mach->MaxOutputVertices = parse.FullToken.FullProperty.u[0].Data;
1190 }
1191 }
1192 break;
1193
1194 default:
1195 assert( 0 );
1196 }
1197 }
1198 tgsi_parse_free (&parse);
1199
1200 FREE(mach->Declarations);
1201 mach->Declarations = declarations;
1202 mach->NumDeclarations = numDeclarations;
1203
1204 FREE(mach->Instructions);
1205 mach->Instructions = instructions;
1206 mach->NumInstructions = numInstructions;
1207 }
1208
1209
1210 struct tgsi_exec_machine *
tgsi_exec_machine_create(enum pipe_shader_type shader_type)1211 tgsi_exec_machine_create(enum pipe_shader_type shader_type)
1212 {
1213 struct tgsi_exec_machine *mach;
1214
1215 mach = align_malloc( sizeof *mach, 16 );
1216 if (!mach)
1217 goto fail;
1218
1219 memset(mach, 0, sizeof(*mach));
1220
1221 mach->ShaderType = shader_type;
1222
1223 if (shader_type != PIPE_SHADER_COMPUTE) {
1224 mach->Inputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_INPUTS, 16);
1225 mach->Outputs = align_malloc(sizeof(struct tgsi_exec_vector) * PIPE_MAX_SHADER_OUTPUTS, 16);
1226 if (!mach->Inputs || !mach->Outputs)
1227 goto fail;
1228 }
1229
1230 if (shader_type == PIPE_SHADER_FRAGMENT) {
1231 mach->InputSampleOffsetApply = align_malloc(sizeof(apply_sample_offset_func) * PIPE_MAX_SHADER_INPUTS, 16);
1232 if (!mach->InputSampleOffsetApply)
1233 goto fail;
1234 }
1235
1236 #if MESA_DEBUG
1237 /* silence warnings */
1238 (void) print_chan;
1239 (void) print_temp;
1240 #endif
1241
1242 return mach;
1243
1244 fail:
1245 if (mach) {
1246 align_free(mach->InputSampleOffsetApply);
1247 align_free(mach->Inputs);
1248 align_free(mach->Outputs);
1249 align_free(mach);
1250 }
1251 return NULL;
1252 }
1253
1254
1255 void
tgsi_exec_machine_destroy(struct tgsi_exec_machine * mach)1256 tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
1257 {
1258 if (mach) {
1259 FREE(mach->Instructions);
1260 FREE(mach->Declarations);
1261 FREE(mach->Imms);
1262
1263 align_free(mach->InputSampleOffsetApply);
1264 align_free(mach->Inputs);
1265 align_free(mach->Outputs);
1266
1267 align_free(mach);
1268 }
1269 }
1270
1271 static void
micro_add(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1272 micro_add(union tgsi_exec_channel *dst,
1273 const union tgsi_exec_channel *src0,
1274 const union tgsi_exec_channel *src1)
1275 {
1276 dst->f[0] = src0->f[0] + src1->f[0];
1277 dst->f[1] = src0->f[1] + src1->f[1];
1278 dst->f[2] = src0->f[2] + src1->f[2];
1279 dst->f[3] = src0->f[3] + src1->f[3];
1280 }
1281
1282 static void
micro_div(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1283 micro_div(
1284 union tgsi_exec_channel *dst,
1285 const union tgsi_exec_channel *src0,
1286 const union tgsi_exec_channel *src1 )
1287 {
1288 dst->f[0] = src0->f[0] / src1->f[0];
1289 dst->f[1] = src0->f[1] / src1->f[1];
1290 dst->f[2] = src0->f[2] / src1->f[2];
1291 dst->f[3] = src0->f[3] / src1->f[3];
1292 }
1293
1294 static void
micro_lt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)1295 micro_lt(
1296 union tgsi_exec_channel *dst,
1297 const union tgsi_exec_channel *src0,
1298 const union tgsi_exec_channel *src1,
1299 const union tgsi_exec_channel *src2,
1300 const union tgsi_exec_channel *src3 )
1301 {
1302 dst->f[0] = src0->f[0] < src1->f[0] ? src2->f[0] : src3->f[0];
1303 dst->f[1] = src0->f[1] < src1->f[1] ? src2->f[1] : src3->f[1];
1304 dst->f[2] = src0->f[2] < src1->f[2] ? src2->f[2] : src3->f[2];
1305 dst->f[3] = src0->f[3] < src1->f[3] ? src2->f[3] : src3->f[3];
1306 }
1307
1308 static void
micro_max(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1309 micro_max(union tgsi_exec_channel *dst,
1310 const union tgsi_exec_channel *src0,
1311 const union tgsi_exec_channel *src1)
1312 {
1313 dst->f[0] = fmaxf(src0->f[0], src1->f[0]);
1314 dst->f[1] = fmaxf(src0->f[1], src1->f[1]);
1315 dst->f[2] = fmaxf(src0->f[2], src1->f[2]);
1316 dst->f[3] = fmaxf(src0->f[3], src1->f[3]);
1317 }
1318
1319 static void
micro_min(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1320 micro_min(union tgsi_exec_channel *dst,
1321 const union tgsi_exec_channel *src0,
1322 const union tgsi_exec_channel *src1)
1323 {
1324 dst->f[0] = fminf(src0->f[0], src1->f[0]);
1325 dst->f[1] = fminf(src0->f[1], src1->f[1]);
1326 dst->f[2] = fminf(src0->f[2], src1->f[2]);
1327 dst->f[3] = fminf(src0->f[3], src1->f[3]);
1328 }
1329
1330 static void
micro_mul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1331 micro_mul(union tgsi_exec_channel *dst,
1332 const union tgsi_exec_channel *src0,
1333 const union tgsi_exec_channel *src1)
1334 {
1335 dst->f[0] = src0->f[0] * src1->f[0];
1336 dst->f[1] = src0->f[1] * src1->f[1];
1337 dst->f[2] = src0->f[2] * src1->f[2];
1338 dst->f[3] = src0->f[3] * src1->f[3];
1339 }
1340
1341 static void
micro_neg(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)1342 micro_neg(
1343 union tgsi_exec_channel *dst,
1344 const union tgsi_exec_channel *src )
1345 {
1346 dst->f[0] = -src->f[0];
1347 dst->f[1] = -src->f[1];
1348 dst->f[2] = -src->f[2];
1349 dst->f[3] = -src->f[3];
1350 }
1351
1352 static void
micro_pow(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1353 micro_pow(
1354 union tgsi_exec_channel *dst,
1355 const union tgsi_exec_channel *src0,
1356 const union tgsi_exec_channel *src1 )
1357 {
1358 dst->f[0] = powf( src0->f[0], src1->f[0] );
1359 dst->f[1] = powf( src0->f[1], src1->f[1] );
1360 dst->f[2] = powf( src0->f[2], src1->f[2] );
1361 dst->f[3] = powf( src0->f[3], src1->f[3] );
1362 }
1363
1364 static void
micro_ldexp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1365 micro_ldexp(union tgsi_exec_channel *dst,
1366 const union tgsi_exec_channel *src0,
1367 const union tgsi_exec_channel *src1)
1368 {
1369 dst->f[0] = ldexpf(src0->f[0], src1->i[0]);
1370 dst->f[1] = ldexpf(src0->f[1], src1->i[1]);
1371 dst->f[2] = ldexpf(src0->f[2], src1->i[2]);
1372 dst->f[3] = ldexpf(src0->f[3], src1->i[3]);
1373 }
1374
1375 static void
micro_sub(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)1376 micro_sub(union tgsi_exec_channel *dst,
1377 const union tgsi_exec_channel *src0,
1378 const union tgsi_exec_channel *src1)
1379 {
1380 dst->f[0] = src0->f[0] - src1->f[0];
1381 dst->f[1] = src0->f[1] - src1->f[1];
1382 dst->f[2] = src0->f[2] - src1->f[2];
1383 dst->f[3] = src0->f[3] - src1->f[3];
1384 }
1385
1386 static void
fetch_src_file_channel(const struct tgsi_exec_machine * mach,const unsigned file,const unsigned swizzle,const union tgsi_exec_channel * index,const union tgsi_exec_channel * index2D,union tgsi_exec_channel * chan)1387 fetch_src_file_channel(const struct tgsi_exec_machine *mach,
1388 const unsigned file,
1389 const unsigned swizzle,
1390 const union tgsi_exec_channel *index,
1391 const union tgsi_exec_channel *index2D,
1392 union tgsi_exec_channel *chan)
1393 {
1394 unsigned i;
1395
1396 assert(swizzle < 4);
1397
1398 switch (file) {
1399 case TGSI_FILE_CONSTANT:
1400 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1401 /* NOTE: copying the const value as a unsigned instead of float */
1402 const unsigned constbuf = index2D->i[i];
1403 const unsigned pos = index->i[i] * 4 + swizzle;
1404 /* const buffer bounds check */
1405 if (pos >= mach->ConstsSize[constbuf] / 4) {
1406 if (0) {
1407 /* Debug: print warning */
1408 static int count = 0;
1409 if (count++ < 100)
1410 debug_printf("TGSI Exec: const buffer index %d"
1411 " out of bounds\n", pos);
1412 }
1413 chan->u[i] = 0;
1414 } else {
1415 const unsigned *buf = (const unsigned *)mach->Consts[constbuf];
1416 chan->u[i] = buf[pos];
1417 }
1418 }
1419 break;
1420
1421 case TGSI_FILE_INPUT:
1422 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1423 /*
1424 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1425 debug_printf("Fetching Input[%d] (2d=%d, 1d=%d)\n",
1426 index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i],
1427 index2D->i[i], index->i[i]);
1428 }*/
1429 int pos = index2D->i[i] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index->i[i];
1430 assert(pos >= 0);
1431 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
1432 chan->u[i] = mach->Inputs[pos].xyzw[swizzle].u[i];
1433 }
1434 break;
1435
1436 case TGSI_FILE_SYSTEM_VALUE:
1437 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1438 chan->u[i] = mach->SystemValue[index->i[i]].xyzw[swizzle].u[i];
1439 }
1440 break;
1441
1442 case TGSI_FILE_TEMPORARY:
1443 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1444 assert(index->i[i] < TGSI_EXEC_NUM_TEMPS);
1445 assert(index2D->i[i] == 0);
1446
1447 chan->u[i] = mach->Temps[index->i[i]].xyzw[swizzle].u[i];
1448 }
1449 break;
1450
1451 case TGSI_FILE_IMMEDIATE:
1452 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1453 assert(index->i[i] >= 0 && index->i[i] < (int)mach->ImmLimit);
1454 assert(index2D->i[i] == 0);
1455
1456 chan->f[i] = mach->Imms[index->i[i]][swizzle];
1457 }
1458 break;
1459
1460 case TGSI_FILE_ADDRESS:
1461 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1462 assert(index->i[i] >= 0 && index->i[i] < ARRAY_SIZE(mach->Addrs));
1463 assert(index2D->i[i] == 0);
1464
1465 chan->u[i] = mach->Addrs[index->i[i]].xyzw[swizzle].u[i];
1466 }
1467 break;
1468
1469 case TGSI_FILE_OUTPUT:
1470 /* vertex/fragment output vars can be read too */
1471 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1472 assert(index->i[i] >= 0);
1473 assert(index2D->i[i] == 0);
1474
1475 chan->u[i] = mach->Outputs[index->i[i]].xyzw[swizzle].u[i];
1476 }
1477 break;
1478
1479 default:
1480 assert(0);
1481 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
1482 chan->u[i] = 0;
1483 }
1484 }
1485 }
1486
1487 static void
get_index_registers(const struct tgsi_exec_machine * mach,const struct tgsi_full_src_register * reg,union tgsi_exec_channel * index,union tgsi_exec_channel * index2D)1488 get_index_registers(const struct tgsi_exec_machine *mach,
1489 const struct tgsi_full_src_register *reg,
1490 union tgsi_exec_channel *index,
1491 union tgsi_exec_channel *index2D)
1492 {
1493 /* We start with a direct index into a register file.
1494 *
1495 * file[1],
1496 * where:
1497 * file = Register.File
1498 * [1] = Register.Index
1499 */
1500 index->i[0] =
1501 index->i[1] =
1502 index->i[2] =
1503 index->i[3] = reg->Register.Index;
1504
1505 /* There is an extra source register that indirectly subscripts
1506 * a register file. The direct index now becomes an offset
1507 * that is being added to the indirect register.
1508 *
1509 * file[ind[2].x+1],
1510 * where:
1511 * ind = Indirect.File
1512 * [2] = Indirect.Index
1513 * .x = Indirect.SwizzleX
1514 */
1515 if (reg->Register.Indirect) {
1516 const unsigned execmask = mach->ExecMask;
1517
1518 assert(reg->Indirect.File == TGSI_FILE_ADDRESS);
1519 const union tgsi_exec_channel *addr = &mach->Addrs[reg->Indirect.Index].xyzw[reg->Indirect.Swizzle];
1520 for (int i = 0; i < TGSI_QUAD_SIZE; i++)
1521 index->i[i] += addr->u[i];
1522
1523 /* for disabled execution channels, zero-out the index to
1524 * avoid using a potential garbage value.
1525 */
1526 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
1527 if ((execmask & (1 << i)) == 0)
1528 index->i[i] = 0;
1529 }
1530 }
1531
1532 /* There is an extra source register that is a second
1533 * subscript to a register file. Effectively it means that
1534 * the register file is actually a 2D array of registers.
1535 *
1536 * file[3][1],
1537 * where:
1538 * [3] = Dimension.Index
1539 */
1540 if (reg->Register.Dimension) {
1541 index2D->i[0] =
1542 index2D->i[1] =
1543 index2D->i[2] =
1544 index2D->i[3] = reg->Dimension.Index;
1545
1546 /* Again, the second subscript index can be addressed indirectly
1547 * identically to the first one.
1548 * Nothing stops us from indirectly addressing the indirect register,
1549 * but there is no need for that, so we won't exercise it.
1550 *
1551 * file[ind[4].y+3][1],
1552 * where:
1553 * ind = DimIndirect.File
1554 * [4] = DimIndirect.Index
1555 * .y = DimIndirect.SwizzleX
1556 */
1557 if (reg->Dimension.Indirect) {
1558 const unsigned execmask = mach->ExecMask;
1559
1560 assert(reg->DimIndirect.File == TGSI_FILE_ADDRESS);
1561 const union tgsi_exec_channel *addr = &mach->Addrs[reg->DimIndirect.Index].xyzw[reg->DimIndirect.Swizzle];
1562 for (int i = 0; i < TGSI_QUAD_SIZE; i++)
1563 index2D->i[i] += addr->u[i];
1564
1565 /* for disabled execution channels, zero-out the index to
1566 * avoid using a potential garbage value.
1567 */
1568 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
1569 if ((execmask & (1 << i)) == 0) {
1570 index2D->i[i] = 0;
1571 }
1572 }
1573 }
1574
1575 /* If by any chance there was a need for a 3D array of register
1576 * files, we would have to check whether Dimension is followed
1577 * by a dimension register and continue the saga.
1578 */
1579 } else {
1580 index2D->i[0] =
1581 index2D->i[1] =
1582 index2D->i[2] =
1583 index2D->i[3] = 0;
1584 }
1585 }
1586
1587
1588 static void
fetch_source_d(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const unsigned chan_index)1589 fetch_source_d(const struct tgsi_exec_machine *mach,
1590 union tgsi_exec_channel *chan,
1591 const struct tgsi_full_src_register *reg,
1592 const unsigned chan_index)
1593 {
1594 union tgsi_exec_channel index;
1595 union tgsi_exec_channel index2D;
1596 unsigned swizzle;
1597
1598 get_index_registers(mach, reg, &index, &index2D);
1599
1600
1601 swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index );
1602 fetch_src_file_channel(mach,
1603 reg->Register.File,
1604 swizzle,
1605 &index,
1606 &index2D,
1607 chan);
1608 }
1609
1610 static void
fetch_source(const struct tgsi_exec_machine * mach,union tgsi_exec_channel * chan,const struct tgsi_full_src_register * reg,const unsigned chan_index,enum tgsi_exec_datatype src_datatype)1611 fetch_source(const struct tgsi_exec_machine *mach,
1612 union tgsi_exec_channel *chan,
1613 const struct tgsi_full_src_register *reg,
1614 const unsigned chan_index,
1615 enum tgsi_exec_datatype src_datatype)
1616 {
1617 fetch_source_d(mach, chan, reg, chan_index);
1618
1619 if (reg->Register.Absolute) {
1620 assert(src_datatype == TGSI_EXEC_DATA_FLOAT);
1621 micro_abs(chan, chan);
1622 }
1623
1624 if (reg->Register.Negate) {
1625 if (src_datatype == TGSI_EXEC_DATA_FLOAT) {
1626 micro_neg(chan, chan);
1627 } else {
1628 micro_ineg(chan, chan);
1629 }
1630 }
1631 }
1632
1633 static union tgsi_exec_channel *
store_dest_dstret(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,unsigned chan_index)1634 store_dest_dstret(struct tgsi_exec_machine *mach,
1635 const union tgsi_exec_channel *chan,
1636 const struct tgsi_full_dst_register *reg,
1637 unsigned chan_index)
1638 {
1639 static union tgsi_exec_channel null;
1640 union tgsi_exec_channel *dst;
1641 int offset = 0; /* indirection offset */
1642 int index;
1643
1644
1645 /* There is an extra source register that indirectly subscripts
1646 * a register file. The direct index now becomes an offset
1647 * that is being added to the indirect register.
1648 *
1649 * file[ind[2].x+1],
1650 * where:
1651 * ind = Indirect.File
1652 * [2] = Indirect.Index
1653 * .x = Indirect.SwizzleX
1654 */
1655 if (reg->Register.Indirect) {
1656 union tgsi_exec_channel index;
1657 union tgsi_exec_channel indir_index;
1658 unsigned swizzle;
1659
1660 /* which address register (always zero for now) */
1661 index.i[0] =
1662 index.i[1] =
1663 index.i[2] =
1664 index.i[3] = reg->Indirect.Index;
1665
1666 /* get current value of address register[swizzle] */
1667 swizzle = reg->Indirect.Swizzle;
1668
1669 /* fetch values from the address/indirection register */
1670 fetch_src_file_channel(mach,
1671 reg->Indirect.File,
1672 swizzle,
1673 &index,
1674 &ZeroVec,
1675 &indir_index);
1676
1677 /* save indirection offset */
1678 offset = indir_index.i[0];
1679 }
1680
1681 switch (reg->Register.File) {
1682 case TGSI_FILE_NULL:
1683 dst = &null;
1684 break;
1685
1686 case TGSI_FILE_OUTPUT:
1687 index = mach->OutputVertexOffset + reg->Register.Index;
1688 dst = &mach->Outputs[offset + index].xyzw[chan_index];
1689 #if 0
1690 debug_printf("NumOutputs = %d, TEMP_O_C/I = %d, redindex = %d\n",
1691 mach->NumOutputs, mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0],
1692 reg->Register.Index);
1693 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1694 debug_printf("STORING OUT[%d] mask(%d), = (", offset + index, execmask);
1695 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1696 if (execmask & (1 << i))
1697 debug_printf("%f, ", chan->f[i]);
1698 debug_printf(")\n");
1699 }
1700 #endif
1701 break;
1702
1703 case TGSI_FILE_TEMPORARY:
1704 index = reg->Register.Index;
1705 assert( index < TGSI_EXEC_NUM_TEMPS );
1706 dst = &mach->Temps[offset + index].xyzw[chan_index];
1707 break;
1708
1709 case TGSI_FILE_ADDRESS:
1710 index = reg->Register.Index;
1711 assert(index >= 0 && index < ARRAY_SIZE(mach->Addrs));
1712 dst = &mach->Addrs[index].xyzw[chan_index];
1713 break;
1714
1715 default:
1716 unreachable("Bad destination file");
1717 }
1718
1719 return dst;
1720 }
1721
1722 static void
store_dest_double(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,unsigned chan_index)1723 store_dest_double(struct tgsi_exec_machine *mach,
1724 const union tgsi_exec_channel *chan,
1725 const struct tgsi_full_dst_register *reg,
1726 unsigned chan_index)
1727 {
1728 union tgsi_exec_channel *dst;
1729 const unsigned execmask = mach->ExecMask;
1730 int i;
1731
1732 dst = store_dest_dstret(mach, chan, reg, chan_index);
1733 if (!dst)
1734 return;
1735
1736 /* doubles path */
1737 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1738 if (execmask & (1 << i))
1739 dst->i[i] = chan->i[i];
1740 }
1741
1742 static void
store_dest(struct tgsi_exec_machine * mach,const union tgsi_exec_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,unsigned chan_index)1743 store_dest(struct tgsi_exec_machine *mach,
1744 const union tgsi_exec_channel *chan,
1745 const struct tgsi_full_dst_register *reg,
1746 const struct tgsi_full_instruction *inst,
1747 unsigned chan_index)
1748 {
1749 union tgsi_exec_channel *dst;
1750 const unsigned execmask = mach->ExecMask;
1751 int i;
1752
1753 dst = store_dest_dstret(mach, chan, reg, chan_index);
1754 if (!dst)
1755 return;
1756
1757 if (!inst->Instruction.Saturate) {
1758 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1759 if (execmask & (1 << i))
1760 dst->i[i] = chan->i[i];
1761 }
1762 else {
1763 for (i = 0; i < TGSI_QUAD_SIZE; i++)
1764 if (execmask & (1 << i))
1765 dst->f[i] = fminf(fmaxf(chan->f[i], 0.0f), 1.0f);
1766 }
1767 }
1768
1769 #define FETCH(VAL,INDEX,CHAN)\
1770 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_FLOAT)
1771
1772 #define IFETCH(VAL,INDEX,CHAN)\
1773 fetch_source(mach, VAL, &inst->Src[INDEX], CHAN, TGSI_EXEC_DATA_INT)
1774
1775
1776 /**
1777 * Execute ARB-style KIL which is predicated by a src register.
1778 * Kill fragment if any of the four values is less than zero.
1779 */
1780 static void
exec_kill_if(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1781 exec_kill_if(struct tgsi_exec_machine *mach,
1782 const struct tgsi_full_instruction *inst)
1783 {
1784 unsigned uniquemask;
1785 unsigned chan_index;
1786 unsigned kilmask = 0; /* bit 0 = pixel 0, bit 1 = pixel 1, etc */
1787 union tgsi_exec_channel r[1];
1788
1789 /* This mask stores component bits that were already tested. */
1790 uniquemask = 0;
1791
1792 for (chan_index = 0; chan_index < 4; chan_index++)
1793 {
1794 unsigned swizzle;
1795 unsigned i;
1796
1797 /* unswizzle channel */
1798 swizzle = tgsi_util_get_full_src_register_swizzle (
1799 &inst->Src[0],
1800 chan_index);
1801
1802 /* check if the component has not been already tested */
1803 if (uniquemask & (1 << swizzle))
1804 continue;
1805 uniquemask |= 1 << swizzle;
1806
1807 FETCH(&r[0], 0, chan_index);
1808 for (i = 0; i < 4; i++)
1809 if (r[0].f[i] < 0.0f)
1810 kilmask |= 1 << i;
1811 }
1812
1813 /* restrict to fragments currently executing */
1814 kilmask &= mach->ExecMask;
1815
1816 mach->KillMask |= kilmask;
1817 }
1818
1819 /**
1820 * Unconditional fragment kill/discard.
1821 */
1822 static void
exec_kill(struct tgsi_exec_machine * mach)1823 exec_kill(struct tgsi_exec_machine *mach)
1824 {
1825 /* kill fragment for all fragments currently executing.
1826 * bit 0 = pixel 0, bit 1 = pixel 1, etc.
1827 */
1828 mach->KillMask |= mach->ExecMask;
1829 }
1830
1831 static void
emit_vertex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1832 emit_vertex(struct tgsi_exec_machine *mach,
1833 const struct tgsi_full_instruction *inst)
1834 {
1835 union tgsi_exec_channel r[1];
1836 unsigned stream_id;
1837 unsigned prim_count;
1838 /* FIXME: check for exec mask correctly
1839 unsigned i;
1840 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1841 if ((mach->ExecMask & (1 << i)))
1842 */
1843 IFETCH(&r[0], 0, TGSI_CHAN_X);
1844 stream_id = r[0].u[0];
1845 prim_count = mach->OutputPrimCount[stream_id];
1846 if (mach->ExecMask) {
1847 if (mach->Primitives[stream_id][prim_count] >= mach->MaxOutputVertices)
1848 return;
1849
1850 if (mach->Primitives[stream_id][prim_count] == 0)
1851 mach->PrimitiveOffsets[stream_id][prim_count] = mach->OutputVertexOffset;
1852 mach->OutputVertexOffset += mach->NumOutputs;
1853 mach->Primitives[stream_id][prim_count]++;
1854 }
1855 }
1856
1857 static void
emit_primitive(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)1858 emit_primitive(struct tgsi_exec_machine *mach,
1859 const struct tgsi_full_instruction *inst)
1860 {
1861 unsigned *prim_count;
1862 union tgsi_exec_channel r[1];
1863 unsigned stream_id = 0;
1864 /* FIXME: check for exec mask correctly
1865 unsigned i;
1866 for (i = 0; i < TGSI_QUAD_SIZE; ++i) {
1867 if ((mach->ExecMask & (1 << i)))
1868 */
1869 if (inst) {
1870 IFETCH(&r[0], 0, TGSI_CHAN_X);
1871 stream_id = r[0].u[0];
1872 }
1873 prim_count = &mach->OutputPrimCount[stream_id];
1874 if (mach->ExecMask) {
1875 ++(*prim_count);
1876 assert((*prim_count * mach->NumOutputs) < TGSI_MAX_TOTAL_VERTICES);
1877 mach->Primitives[stream_id][*prim_count] = 0;
1878 }
1879 }
1880
1881 static void
conditional_emit_primitive(struct tgsi_exec_machine * mach)1882 conditional_emit_primitive(struct tgsi_exec_machine *mach)
1883 {
1884 if (PIPE_SHADER_GEOMETRY == mach->ShaderType) {
1885 int emitted_verts = mach->Primitives[0][mach->OutputPrimCount[0]];
1886 if (emitted_verts) {
1887 emit_primitive(mach, NULL);
1888 }
1889 }
1890 }
1891
1892
1893 /*
1894 * Fetch four texture samples using STR texture coordinates.
1895 */
1896 static void
fetch_texel(struct tgsi_sampler * sampler,const unsigned sview_idx,const unsigned sampler_idx,const union tgsi_exec_channel * s,const union tgsi_exec_channel * t,const union tgsi_exec_channel * p,const union tgsi_exec_channel * c0,const union tgsi_exec_channel * c1,float derivs[3][2][TGSI_QUAD_SIZE],const int8_t offset[3],enum tgsi_sampler_control control,union tgsi_exec_channel * r,union tgsi_exec_channel * g,union tgsi_exec_channel * b,union tgsi_exec_channel * a)1897 fetch_texel( struct tgsi_sampler *sampler,
1898 const unsigned sview_idx,
1899 const unsigned sampler_idx,
1900 const union tgsi_exec_channel *s,
1901 const union tgsi_exec_channel *t,
1902 const union tgsi_exec_channel *p,
1903 const union tgsi_exec_channel *c0,
1904 const union tgsi_exec_channel *c1,
1905 float derivs[3][2][TGSI_QUAD_SIZE],
1906 const int8_t offset[3],
1907 enum tgsi_sampler_control control,
1908 union tgsi_exec_channel *r,
1909 union tgsi_exec_channel *g,
1910 union tgsi_exec_channel *b,
1911 union tgsi_exec_channel *a )
1912 {
1913 unsigned j;
1914 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
1915
1916 /* FIXME: handle explicit derivs, offsets */
1917 sampler->get_samples(sampler, sview_idx, sampler_idx,
1918 s->f, t->f, p->f, c0->f, c1->f, derivs, offset, control, rgba);
1919
1920 for (j = 0; j < 4; j++) {
1921 r->f[j] = rgba[0][j];
1922 g->f[j] = rgba[1][j];
1923 b->f[j] = rgba[2][j];
1924 a->f[j] = rgba[3][j];
1925 }
1926 }
1927
1928
1929 enum tex_modifier {
1930 TEX_MODIFIER_NONE = 0,
1931 TEX_MODIFIER_PROJECTED = 1,
1932 TEX_MODIFIER_LOD_BIAS = 2,
1933 TEX_MODIFIER_EXPLICIT_LOD = 3,
1934 TEX_MODIFIER_LEVEL_ZERO = 4,
1935 TEX_MODIFIER_GATHER = 5,
1936 };
1937
1938 /*
1939 * Fetch all 3 (for s,t,r coords) texel offsets, put them into int array.
1940 */
1941 static void
fetch_texel_offsets(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int8_t offsets[3])1942 fetch_texel_offsets(struct tgsi_exec_machine *mach,
1943 const struct tgsi_full_instruction *inst,
1944 int8_t offsets[3])
1945 {
1946 if (inst->Texture.NumOffsets == 1) {
1947 union tgsi_exec_channel index;
1948 union tgsi_exec_channel offset[3];
1949 index.i[0] = index.i[1] = index.i[2] = index.i[3] = inst->TexOffsets[0].Index;
1950 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
1951 inst->TexOffsets[0].SwizzleX, &index, &ZeroVec, &offset[0]);
1952 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
1953 inst->TexOffsets[0].SwizzleY, &index, &ZeroVec, &offset[1]);
1954 fetch_src_file_channel(mach, inst->TexOffsets[0].File,
1955 inst->TexOffsets[0].SwizzleZ, &index, &ZeroVec, &offset[2]);
1956 offsets[0] = offset[0].i[0];
1957 offsets[1] = offset[1].i[0];
1958 offsets[2] = offset[2].i[0];
1959 } else {
1960 assert(inst->Texture.NumOffsets == 0);
1961 offsets[0] = offsets[1] = offsets[2] = 0;
1962 }
1963 }
1964
1965
1966 /*
1967 * Fetch dx and dy values for one channel (s, t or r).
1968 * Put dx values into one float array, dy values into another.
1969 */
1970 static void
fetch_assign_deriv_channel(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned regdsrcx,unsigned chan,float derivs[2][TGSI_QUAD_SIZE])1971 fetch_assign_deriv_channel(struct tgsi_exec_machine *mach,
1972 const struct tgsi_full_instruction *inst,
1973 unsigned regdsrcx,
1974 unsigned chan,
1975 float derivs[2][TGSI_QUAD_SIZE])
1976 {
1977 union tgsi_exec_channel d;
1978 FETCH(&d, regdsrcx, chan);
1979 derivs[0][0] = d.f[0];
1980 derivs[0][1] = d.f[1];
1981 derivs[0][2] = d.f[2];
1982 derivs[0][3] = d.f[3];
1983 FETCH(&d, regdsrcx + 1, chan);
1984 derivs[1][0] = d.f[0];
1985 derivs[1][1] = d.f[1];
1986 derivs[1][2] = d.f[2];
1987 derivs[1][3] = d.f[3];
1988 }
1989
1990 static unsigned
fetch_sampler_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,unsigned sampler)1991 fetch_sampler_unit(struct tgsi_exec_machine *mach,
1992 const struct tgsi_full_instruction *inst,
1993 unsigned sampler)
1994 {
1995 unsigned unit = 0;
1996 int i;
1997 if (inst->Src[sampler].Register.Indirect) {
1998 const struct tgsi_full_src_register *reg = &inst->Src[sampler];
1999 union tgsi_exec_channel indir_index, index2;
2000 const unsigned execmask = mach->ExecMask;
2001 index2.i[0] =
2002 index2.i[1] =
2003 index2.i[2] =
2004 index2.i[3] = reg->Indirect.Index;
2005
2006 fetch_src_file_channel(mach,
2007 reg->Indirect.File,
2008 reg->Indirect.Swizzle,
2009 &index2,
2010 &ZeroVec,
2011 &indir_index);
2012 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2013 if (execmask & (1 << i)) {
2014 unit = inst->Src[sampler].Register.Index + indir_index.i[i];
2015 break;
2016 }
2017 }
2018
2019 } else {
2020 unit = inst->Src[sampler].Register.Index;
2021 }
2022 return unit;
2023 }
2024
2025 /*
2026 * execute a texture instruction.
2027 *
2028 * modifier is used to control the channel routing for the
2029 * instruction variants like proj, lod, and texture with lod bias.
2030 * sampler indicates which src register the sampler is contained in.
2031 */
2032 static void
exec_tex(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,enum tex_modifier modifier,unsigned sampler)2033 exec_tex(struct tgsi_exec_machine *mach,
2034 const struct tgsi_full_instruction *inst,
2035 enum tex_modifier modifier, unsigned sampler)
2036 {
2037 const union tgsi_exec_channel *args[5], *proj = NULL;
2038 union tgsi_exec_channel r[5];
2039 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2040 unsigned chan;
2041 unsigned unit;
2042 int8_t offsets[3];
2043 int dim, shadow_ref, i;
2044
2045 unit = fetch_sampler_unit(mach, inst, sampler);
2046 /* always fetch all 3 offsets, overkill but keeps code simple */
2047 fetch_texel_offsets(mach, inst, offsets);
2048
2049 assert(modifier != TEX_MODIFIER_LEVEL_ZERO);
2050 assert(inst->Texture.Texture != TGSI_TEXTURE_BUFFER);
2051
2052 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2053 shadow_ref = tgsi_util_get_shadow_ref_src_index(inst->Texture.Texture);
2054
2055 assert(dim <= 4);
2056 if (shadow_ref >= 0)
2057 assert(shadow_ref >= dim && shadow_ref < (int)ARRAY_SIZE(args));
2058
2059 /* fetch modifier to the last argument */
2060 if (modifier != TEX_MODIFIER_NONE) {
2061 const int last = ARRAY_SIZE(args) - 1;
2062
2063 /* fetch modifier from src0.w or src1.x */
2064 if (sampler == 1) {
2065 assert(dim <= TGSI_CHAN_W && shadow_ref != TGSI_CHAN_W);
2066 FETCH(&r[last], 0, TGSI_CHAN_W);
2067 }
2068 else {
2069 FETCH(&r[last], 1, TGSI_CHAN_X);
2070 }
2071
2072 if (modifier != TEX_MODIFIER_PROJECTED) {
2073 args[last] = &r[last];
2074 }
2075 else {
2076 proj = &r[last];
2077 args[last] = &ZeroVec;
2078 }
2079
2080 /* point unused arguments to zero vector */
2081 for (i = dim; i < last; i++)
2082 args[i] = &ZeroVec;
2083
2084 if (modifier == TEX_MODIFIER_EXPLICIT_LOD)
2085 control = TGSI_SAMPLER_LOD_EXPLICIT;
2086 else if (modifier == TEX_MODIFIER_LOD_BIAS)
2087 control = TGSI_SAMPLER_LOD_BIAS;
2088 else if (modifier == TEX_MODIFIER_GATHER)
2089 control = TGSI_SAMPLER_GATHER;
2090 }
2091 else {
2092 for (i = dim; i < (int)ARRAY_SIZE(args); i++)
2093 args[i] = &ZeroVec;
2094 }
2095
2096 /* fetch coordinates */
2097 for (i = 0; i < dim; i++) {
2098 FETCH(&r[i], 0, TGSI_CHAN_X + i);
2099
2100 if (proj)
2101 micro_div(&r[i], &r[i], proj);
2102
2103 args[i] = &r[i];
2104 }
2105
2106 /* fetch reference value */
2107 if (shadow_ref >= 0) {
2108 FETCH(&r[shadow_ref], shadow_ref / 4, TGSI_CHAN_X + (shadow_ref % 4));
2109
2110 if (proj)
2111 micro_div(&r[shadow_ref], &r[shadow_ref], proj);
2112
2113 args[shadow_ref] = &r[shadow_ref];
2114 }
2115
2116 fetch_texel(mach->Sampler, unit, unit,
2117 args[0], args[1], args[2], args[3], args[4],
2118 NULL, offsets, control,
2119 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2120
2121 #if 0
2122 debug_printf("fetch r: %g %g %g %g\n",
2123 r[0].f[0], r[0].f[1], r[0].f[2], r[0].f[3]);
2124 debug_printf("fetch g: %g %g %g %g\n",
2125 r[1].f[0], r[1].f[1], r[1].f[2], r[1].f[3]);
2126 debug_printf("fetch b: %g %g %g %g\n",
2127 r[2].f[0], r[2].f[1], r[2].f[2], r[2].f[3]);
2128 debug_printf("fetch a: %g %g %g %g\n",
2129 r[3].f[0], r[3].f[1], r[3].f[2], r[3].f[3]);
2130 #endif
2131
2132 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2133 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2134 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2135 }
2136 }
2137 }
2138
2139 static void
exec_lodq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2140 exec_lodq(struct tgsi_exec_machine *mach,
2141 const struct tgsi_full_instruction *inst)
2142 {
2143 unsigned resource_unit, sampler_unit;
2144 unsigned dim;
2145 unsigned i;
2146 union tgsi_exec_channel coords[4];
2147 const union tgsi_exec_channel *args[ARRAY_SIZE(coords)];
2148 union tgsi_exec_channel r[2];
2149
2150 resource_unit = fetch_sampler_unit(mach, inst, 1);
2151 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2152 unsigned target = mach->SamplerViews[resource_unit].Resource;
2153 dim = tgsi_util_get_texture_coord_dim(target);
2154 sampler_unit = fetch_sampler_unit(mach, inst, 2);
2155 } else {
2156 dim = tgsi_util_get_texture_coord_dim(inst->Texture.Texture);
2157 sampler_unit = resource_unit;
2158 }
2159 assert(dim <= ARRAY_SIZE(coords));
2160 /* fetch coordinates */
2161 for (i = 0; i < dim; i++) {
2162 FETCH(&coords[i], 0, TGSI_CHAN_X + i);
2163 args[i] = &coords[i];
2164 }
2165 for (i = dim; i < ARRAY_SIZE(coords); i++) {
2166 args[i] = &ZeroVec;
2167 }
2168 mach->Sampler->query_lod(mach->Sampler, resource_unit, sampler_unit,
2169 args[0]->f,
2170 args[1]->f,
2171 args[2]->f,
2172 args[3]->f,
2173 TGSI_SAMPLER_LOD_NONE,
2174 r[0].f,
2175 r[1].f);
2176
2177 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2178 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2179 }
2180 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2181 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2182 }
2183 if (inst->Instruction.Opcode == TGSI_OPCODE_LOD) {
2184 unsigned char swizzles[4];
2185 unsigned chan;
2186 swizzles[0] = inst->Src[1].Register.SwizzleX;
2187 swizzles[1] = inst->Src[1].Register.SwizzleY;
2188 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2189 swizzles[3] = inst->Src[1].Register.SwizzleW;
2190
2191 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2192 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2193 if (swizzles[chan] >= 2) {
2194 store_dest(mach, &ZeroVec,
2195 &inst->Dst[0], inst, chan);
2196 } else {
2197 store_dest(mach, &r[swizzles[chan]],
2198 &inst->Dst[0], inst, chan);
2199 }
2200 }
2201 }
2202 } else {
2203 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
2204 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
2205 }
2206 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
2207 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Y);
2208 }
2209 }
2210 }
2211
2212 static void
exec_txd(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2213 exec_txd(struct tgsi_exec_machine *mach,
2214 const struct tgsi_full_instruction *inst)
2215 {
2216 union tgsi_exec_channel r[4];
2217 float derivs[3][2][TGSI_QUAD_SIZE];
2218 unsigned chan;
2219 unsigned unit;
2220 int8_t offsets[3];
2221
2222 unit = fetch_sampler_unit(mach, inst, 3);
2223 /* always fetch all 3 offsets, overkill but keeps code simple */
2224 fetch_texel_offsets(mach, inst, offsets);
2225
2226 switch (inst->Texture.Texture) {
2227 case TGSI_TEXTURE_1D:
2228 FETCH(&r[0], 0, TGSI_CHAN_X);
2229
2230 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2231
2232 fetch_texel(mach->Sampler, unit, unit,
2233 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2234 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2235 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2236 break;
2237
2238 case TGSI_TEXTURE_SHADOW1D:
2239 case TGSI_TEXTURE_1D_ARRAY:
2240 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2241 /* SHADOW1D/1D_ARRAY would not need Y/Z respectively, but don't bother */
2242 FETCH(&r[0], 0, TGSI_CHAN_X);
2243 FETCH(&r[1], 0, TGSI_CHAN_Y);
2244 FETCH(&r[2], 0, TGSI_CHAN_Z);
2245
2246 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2247
2248 fetch_texel(mach->Sampler, unit, unit,
2249 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2250 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2251 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2252 break;
2253
2254 case TGSI_TEXTURE_2D:
2255 case TGSI_TEXTURE_RECT:
2256 FETCH(&r[0], 0, TGSI_CHAN_X);
2257 FETCH(&r[1], 0, TGSI_CHAN_Y);
2258
2259 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2260 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2261
2262 fetch_texel(mach->Sampler, unit, unit,
2263 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2264 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2265 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2266 break;
2267
2268
2269 case TGSI_TEXTURE_SHADOW2D:
2270 case TGSI_TEXTURE_SHADOWRECT:
2271 case TGSI_TEXTURE_2D_ARRAY:
2272 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2273 /* only SHADOW2D_ARRAY actually needs W */
2274 FETCH(&r[0], 0, TGSI_CHAN_X);
2275 FETCH(&r[1], 0, TGSI_CHAN_Y);
2276 FETCH(&r[2], 0, TGSI_CHAN_Z);
2277 FETCH(&r[3], 0, TGSI_CHAN_W);
2278
2279 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2280 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2281
2282 fetch_texel(mach->Sampler, unit, unit,
2283 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2284 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2285 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2286 break;
2287
2288 case TGSI_TEXTURE_3D:
2289 case TGSI_TEXTURE_CUBE:
2290 case TGSI_TEXTURE_CUBE_ARRAY:
2291 case TGSI_TEXTURE_SHADOWCUBE:
2292 /* only TEXTURE_CUBE_ARRAY and TEXTURE_SHADOWCUBE actually need W */
2293 FETCH(&r[0], 0, TGSI_CHAN_X);
2294 FETCH(&r[1], 0, TGSI_CHAN_Y);
2295 FETCH(&r[2], 0, TGSI_CHAN_Z);
2296 FETCH(&r[3], 0, TGSI_CHAN_W);
2297
2298 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_X, derivs[0]);
2299 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Y, derivs[1]);
2300 fetch_assign_deriv_channel(mach, inst, 1, TGSI_CHAN_Z, derivs[2]);
2301
2302 fetch_texel(mach->Sampler, unit, unit,
2303 &r[0], &r[1], &r[2], &r[3], &ZeroVec, /* inputs */
2304 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2305 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2306 break;
2307
2308 default:
2309 assert(0);
2310 }
2311
2312 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2313 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2314 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2315 }
2316 }
2317 }
2318
2319
2320 static void
exec_txf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2321 exec_txf(struct tgsi_exec_machine *mach,
2322 const struct tgsi_full_instruction *inst)
2323 {
2324 union tgsi_exec_channel r[4];
2325 unsigned chan;
2326 unsigned unit;
2327 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
2328 int j;
2329 int8_t offsets[3];
2330 unsigned target;
2331
2332 unit = fetch_sampler_unit(mach, inst, 1);
2333 /* always fetch all 3 offsets, overkill but keeps code simple */
2334 fetch_texel_offsets(mach, inst, offsets);
2335
2336 IFETCH(&r[3], 0, TGSI_CHAN_W);
2337
2338 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2339 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2340 target = mach->SamplerViews[unit].Resource;
2341 }
2342 else {
2343 target = inst->Texture.Texture;
2344 }
2345 switch(target) {
2346 case TGSI_TEXTURE_3D:
2347 case TGSI_TEXTURE_2D_ARRAY:
2348 case TGSI_TEXTURE_SHADOW2D_ARRAY:
2349 case TGSI_TEXTURE_2D_ARRAY_MSAA:
2350 IFETCH(&r[2], 0, TGSI_CHAN_Z);
2351 FALLTHROUGH;
2352 case TGSI_TEXTURE_2D:
2353 case TGSI_TEXTURE_RECT:
2354 case TGSI_TEXTURE_SHADOW1D_ARRAY:
2355 case TGSI_TEXTURE_SHADOW2D:
2356 case TGSI_TEXTURE_SHADOWRECT:
2357 case TGSI_TEXTURE_1D_ARRAY:
2358 case TGSI_TEXTURE_2D_MSAA:
2359 IFETCH(&r[1], 0, TGSI_CHAN_Y);
2360 FALLTHROUGH;
2361 case TGSI_TEXTURE_BUFFER:
2362 case TGSI_TEXTURE_1D:
2363 case TGSI_TEXTURE_SHADOW1D:
2364 IFETCH(&r[0], 0, TGSI_CHAN_X);
2365 break;
2366 default:
2367 assert(0);
2368 break;
2369 }
2370
2371 mach->Sampler->get_texel(mach->Sampler, unit, r[0].i, r[1].i, r[2].i, r[3].i,
2372 offsets, rgba);
2373
2374 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
2375 r[0].f[j] = rgba[0][j];
2376 r[1].f[j] = rgba[1][j];
2377 r[2].f[j] = rgba[2][j];
2378 r[3].f[j] = rgba[3][j];
2379 }
2380
2381 if (inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I ||
2382 inst->Instruction.Opcode == TGSI_OPCODE_SAMPLE_I_MS) {
2383 unsigned char swizzles[4];
2384 swizzles[0] = inst->Src[1].Register.SwizzleX;
2385 swizzles[1] = inst->Src[1].Register.SwizzleY;
2386 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2387 swizzles[3] = inst->Src[1].Register.SwizzleW;
2388
2389 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2390 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2391 store_dest(mach, &r[swizzles[chan]],
2392 &inst->Dst[0], inst, chan);
2393 }
2394 }
2395 }
2396 else {
2397 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2398 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2399 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2400 }
2401 }
2402 }
2403 }
2404
2405 static void
exec_txq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2406 exec_txq(struct tgsi_exec_machine *mach,
2407 const struct tgsi_full_instruction *inst)
2408 {
2409 int result[4];
2410 union tgsi_exec_channel r[4], src;
2411 unsigned chan;
2412 unsigned unit;
2413 int i,j;
2414
2415 unit = fetch_sampler_unit(mach, inst, 1);
2416
2417 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
2418
2419 /* XXX: This interface can't return per-pixel values */
2420 mach->Sampler->get_dims(mach->Sampler, unit, src.i[0], result);
2421
2422 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2423 for (j = 0; j < 4; j++) {
2424 r[j].i[i] = result[j];
2425 }
2426 }
2427
2428 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2429 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2430 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
2431 }
2432 }
2433 }
2434
2435 static void
exec_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,enum tex_modifier modifier,bool compare)2436 exec_sample(struct tgsi_exec_machine *mach,
2437 const struct tgsi_full_instruction *inst,
2438 enum tex_modifier modifier, bool compare)
2439 {
2440 const unsigned resource_unit = inst->Src[1].Register.Index;
2441 const unsigned sampler_unit = inst->Src[2].Register.Index;
2442 union tgsi_exec_channel r[5], c1;
2443 const union tgsi_exec_channel *lod = &ZeroVec;
2444 enum tgsi_sampler_control control = TGSI_SAMPLER_LOD_NONE;
2445 unsigned chan;
2446 unsigned char swizzles[4];
2447 int8_t offsets[3];
2448
2449 /* always fetch all 3 offsets, overkill but keeps code simple */
2450 fetch_texel_offsets(mach, inst, offsets);
2451
2452 assert(modifier != TEX_MODIFIER_PROJECTED);
2453
2454 if (modifier != TEX_MODIFIER_NONE) {
2455 if (modifier == TEX_MODIFIER_LOD_BIAS) {
2456 FETCH(&c1, 3, TGSI_CHAN_X);
2457 lod = &c1;
2458 control = TGSI_SAMPLER_LOD_BIAS;
2459 }
2460 else if (modifier == TEX_MODIFIER_EXPLICIT_LOD) {
2461 FETCH(&c1, 3, TGSI_CHAN_X);
2462 lod = &c1;
2463 control = TGSI_SAMPLER_LOD_EXPLICIT;
2464 }
2465 else if (modifier == TEX_MODIFIER_GATHER) {
2466 control = TGSI_SAMPLER_GATHER;
2467 }
2468 else {
2469 assert(modifier == TEX_MODIFIER_LEVEL_ZERO);
2470 control = TGSI_SAMPLER_LOD_ZERO;
2471 }
2472 }
2473
2474 FETCH(&r[0], 0, TGSI_CHAN_X);
2475
2476 switch (mach->SamplerViews[resource_unit].Resource) {
2477 case TGSI_TEXTURE_1D:
2478 if (compare) {
2479 FETCH(&r[2], 3, TGSI_CHAN_X);
2480 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2481 &r[0], &ZeroVec, &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2482 NULL, offsets, control,
2483 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2484 }
2485 else {
2486 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2487 &r[0], &ZeroVec, &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2488 NULL, offsets, control,
2489 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2490 }
2491 break;
2492
2493 case TGSI_TEXTURE_1D_ARRAY:
2494 case TGSI_TEXTURE_2D:
2495 case TGSI_TEXTURE_RECT:
2496 FETCH(&r[1], 0, TGSI_CHAN_Y);
2497 if (compare) {
2498 FETCH(&r[2], 3, TGSI_CHAN_X);
2499 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2500 &r[0], &r[1], &r[2], &ZeroVec, lod, /* S, T, P, C, LOD */
2501 NULL, offsets, control,
2502 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2503 }
2504 else {
2505 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2506 &r[0], &r[1], &ZeroVec, &ZeroVec, lod, /* S, T, P, C, LOD */
2507 NULL, offsets, control,
2508 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2509 }
2510 break;
2511
2512 case TGSI_TEXTURE_2D_ARRAY:
2513 case TGSI_TEXTURE_3D:
2514 case TGSI_TEXTURE_CUBE:
2515 FETCH(&r[1], 0, TGSI_CHAN_Y);
2516 FETCH(&r[2], 0, TGSI_CHAN_Z);
2517 if(compare) {
2518 FETCH(&r[3], 3, TGSI_CHAN_X);
2519 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2520 &r[0], &r[1], &r[2], &r[3], lod,
2521 NULL, offsets, control,
2522 &r[0], &r[1], &r[2], &r[3]);
2523 }
2524 else {
2525 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2526 &r[0], &r[1], &r[2], &ZeroVec, lod,
2527 NULL, offsets, control,
2528 &r[0], &r[1], &r[2], &r[3]);
2529 }
2530 break;
2531
2532 case TGSI_TEXTURE_CUBE_ARRAY:
2533 FETCH(&r[1], 0, TGSI_CHAN_Y);
2534 FETCH(&r[2], 0, TGSI_CHAN_Z);
2535 FETCH(&r[3], 0, TGSI_CHAN_W);
2536 if(compare) {
2537 FETCH(&r[4], 3, TGSI_CHAN_X);
2538 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2539 &r[0], &r[1], &r[2], &r[3], &r[4],
2540 NULL, offsets, control,
2541 &r[0], &r[1], &r[2], &r[3]);
2542 }
2543 else {
2544 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2545 &r[0], &r[1], &r[2], &r[3], lod,
2546 NULL, offsets, control,
2547 &r[0], &r[1], &r[2], &r[3]);
2548 }
2549 break;
2550
2551
2552 default:
2553 assert(0);
2554 }
2555
2556 swizzles[0] = inst->Src[1].Register.SwizzleX;
2557 swizzles[1] = inst->Src[1].Register.SwizzleY;
2558 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2559 swizzles[3] = inst->Src[1].Register.SwizzleW;
2560
2561 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2562 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2563 store_dest(mach, &r[swizzles[chan]],
2564 &inst->Dst[0], inst, chan);
2565 }
2566 }
2567 }
2568
2569 static void
exec_sample_d(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)2570 exec_sample_d(struct tgsi_exec_machine *mach,
2571 const struct tgsi_full_instruction *inst)
2572 {
2573 const unsigned resource_unit = inst->Src[1].Register.Index;
2574 const unsigned sampler_unit = inst->Src[2].Register.Index;
2575 union tgsi_exec_channel r[4];
2576 float derivs[3][2][TGSI_QUAD_SIZE];
2577 unsigned chan;
2578 unsigned char swizzles[4];
2579 int8_t offsets[3];
2580
2581 /* always fetch all 3 offsets, overkill but keeps code simple */
2582 fetch_texel_offsets(mach, inst, offsets);
2583
2584 FETCH(&r[0], 0, TGSI_CHAN_X);
2585
2586 switch (mach->SamplerViews[resource_unit].Resource) {
2587 case TGSI_TEXTURE_1D:
2588 case TGSI_TEXTURE_1D_ARRAY:
2589 /* only 1D array actually needs Y */
2590 FETCH(&r[1], 0, TGSI_CHAN_Y);
2591
2592 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2593
2594 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2595 &r[0], &r[1], &ZeroVec, &ZeroVec, &ZeroVec, /* S, T, P, C, LOD */
2596 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2597 &r[0], &r[1], &r[2], &r[3]); /* R, G, B, A */
2598 break;
2599
2600 case TGSI_TEXTURE_2D:
2601 case TGSI_TEXTURE_RECT:
2602 case TGSI_TEXTURE_2D_ARRAY:
2603 /* only 2D array actually needs Z */
2604 FETCH(&r[1], 0, TGSI_CHAN_Y);
2605 FETCH(&r[2], 0, TGSI_CHAN_Z);
2606
2607 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2608 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2609
2610 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2611 &r[0], &r[1], &r[2], &ZeroVec, &ZeroVec, /* inputs */
2612 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2613 &r[0], &r[1], &r[2], &r[3]); /* outputs */
2614 break;
2615
2616 case TGSI_TEXTURE_3D:
2617 case TGSI_TEXTURE_CUBE:
2618 case TGSI_TEXTURE_CUBE_ARRAY:
2619 /* only cube array actually needs W */
2620 FETCH(&r[1], 0, TGSI_CHAN_Y);
2621 FETCH(&r[2], 0, TGSI_CHAN_Z);
2622 FETCH(&r[3], 0, TGSI_CHAN_W);
2623
2624 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_X, derivs[0]);
2625 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Y, derivs[1]);
2626 fetch_assign_deriv_channel(mach, inst, 3, TGSI_CHAN_Z, derivs[2]);
2627
2628 fetch_texel(mach->Sampler, resource_unit, sampler_unit,
2629 &r[0], &r[1], &r[2], &r[3], &ZeroVec,
2630 derivs, offsets, TGSI_SAMPLER_DERIVS_EXPLICIT,
2631 &r[0], &r[1], &r[2], &r[3]);
2632 break;
2633
2634 default:
2635 assert(0);
2636 }
2637
2638 swizzles[0] = inst->Src[1].Register.SwizzleX;
2639 swizzles[1] = inst->Src[1].Register.SwizzleY;
2640 swizzles[2] = inst->Src[1].Register.SwizzleZ;
2641 swizzles[3] = inst->Src[1].Register.SwizzleW;
2642
2643 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2644 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2645 store_dest(mach, &r[swizzles[chan]],
2646 &inst->Dst[0], inst, chan);
2647 }
2648 }
2649 }
2650
2651
2652 /**
2653 * Evaluate a constant-valued coefficient at the position of the
2654 * current quad.
2655 */
2656 static void
eval_constant_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2657 eval_constant_coef(
2658 struct tgsi_exec_machine *mach,
2659 unsigned attrib,
2660 unsigned chan )
2661 {
2662 unsigned i;
2663
2664 for( i = 0; i < TGSI_QUAD_SIZE; i++ ) {
2665 mach->Inputs[attrib].xyzw[chan].f[i] = mach->InterpCoefs[attrib].a0[chan];
2666 }
2667 }
2668
2669 static void
interp_constant_offset(UNUSED const struct tgsi_exec_machine * mach,UNUSED unsigned attrib,UNUSED unsigned chan,UNUSED float ofs_x,UNUSED float ofs_y,UNUSED union tgsi_exec_channel * out_chan)2670 interp_constant_offset(
2671 UNUSED const struct tgsi_exec_machine *mach,
2672 UNUSED unsigned attrib,
2673 UNUSED unsigned chan,
2674 UNUSED float ofs_x,
2675 UNUSED float ofs_y,
2676 UNUSED union tgsi_exec_channel *out_chan)
2677 {
2678 }
2679
2680 /**
2681 * Evaluate a linear-valued coefficient at the position of the
2682 * current quad.
2683 */
2684 static void
interp_linear_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2685 interp_linear_offset(
2686 const struct tgsi_exec_machine *mach,
2687 unsigned attrib,
2688 unsigned chan,
2689 float ofs_x,
2690 float ofs_y,
2691 union tgsi_exec_channel *out_chan)
2692 {
2693 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2694 const float dady = mach->InterpCoefs[attrib].dady[chan];
2695 const float delta = ofs_x * dadx + ofs_y * dady;
2696 out_chan->f[0] += delta;
2697 out_chan->f[1] += delta;
2698 out_chan->f[2] += delta;
2699 out_chan->f[3] += delta;
2700 }
2701
2702 static void
eval_linear_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2703 eval_linear_coef(struct tgsi_exec_machine *mach,
2704 unsigned attrib,
2705 unsigned chan)
2706 {
2707 const float x = mach->QuadPos.xyzw[0].f[0];
2708 const float y = mach->QuadPos.xyzw[1].f[0];
2709 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2710 const float dady = mach->InterpCoefs[attrib].dady[chan];
2711 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2712
2713 mach->Inputs[attrib].xyzw[chan].f[0] = a0;
2714 mach->Inputs[attrib].xyzw[chan].f[1] = a0 + dadx;
2715 mach->Inputs[attrib].xyzw[chan].f[2] = a0 + dady;
2716 mach->Inputs[attrib].xyzw[chan].f[3] = a0 + dadx + dady;
2717 }
2718
2719 /**
2720 * Evaluate a perspective-valued coefficient at the position of the
2721 * current quad.
2722 */
2723
2724 static void
interp_perspective_offset(const struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan,float ofs_x,float ofs_y,union tgsi_exec_channel * out_chan)2725 interp_perspective_offset(
2726 const struct tgsi_exec_machine *mach,
2727 unsigned attrib,
2728 unsigned chan,
2729 float ofs_x,
2730 float ofs_y,
2731 union tgsi_exec_channel *out_chan)
2732 {
2733 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2734 const float dady = mach->InterpCoefs[attrib].dady[chan];
2735 const float *w = mach->QuadPos.xyzw[3].f;
2736 const float delta = ofs_x * dadx + ofs_y * dady;
2737 out_chan->f[0] += delta / w[0];
2738 out_chan->f[1] += delta / w[1];
2739 out_chan->f[2] += delta / w[2];
2740 out_chan->f[3] += delta / w[3];
2741 }
2742
2743 static void
eval_perspective_coef(struct tgsi_exec_machine * mach,unsigned attrib,unsigned chan)2744 eval_perspective_coef(
2745 struct tgsi_exec_machine *mach,
2746 unsigned attrib,
2747 unsigned chan )
2748 {
2749 const float x = mach->QuadPos.xyzw[0].f[0];
2750 const float y = mach->QuadPos.xyzw[1].f[0];
2751 const float dadx = mach->InterpCoefs[attrib].dadx[chan];
2752 const float dady = mach->InterpCoefs[attrib].dady[chan];
2753 const float a0 = mach->InterpCoefs[attrib].a0[chan] + dadx * x + dady * y;
2754 const float *w = mach->QuadPos.xyzw[3].f;
2755 /* divide by W here */
2756 mach->Inputs[attrib].xyzw[chan].f[0] = a0 / w[0];
2757 mach->Inputs[attrib].xyzw[chan].f[1] = (a0 + dadx) / w[1];
2758 mach->Inputs[attrib].xyzw[chan].f[2] = (a0 + dady) / w[2];
2759 mach->Inputs[attrib].xyzw[chan].f[3] = (a0 + dadx + dady) / w[3];
2760 }
2761
2762
2763 typedef void (* eval_coef_func)(
2764 struct tgsi_exec_machine *mach,
2765 unsigned attrib,
2766 unsigned chan );
2767
2768 static void
exec_declaration(struct tgsi_exec_machine * mach,const struct tgsi_full_declaration * decl)2769 exec_declaration(struct tgsi_exec_machine *mach,
2770 const struct tgsi_full_declaration *decl)
2771 {
2772 if (decl->Declaration.File == TGSI_FILE_SAMPLER_VIEW) {
2773 mach->SamplerViews[decl->Range.First] = decl->SamplerView;
2774 return;
2775 }
2776
2777 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
2778 if (decl->Declaration.File == TGSI_FILE_INPUT) {
2779 unsigned first, last, mask;
2780
2781 first = decl->Range.First;
2782 last = decl->Range.Last;
2783 mask = decl->Declaration.UsageMask;
2784
2785 /* XXX we could remove this special-case code since
2786 * mach->InterpCoefs[first].a0 should already have the
2787 * front/back-face value. But we should first update the
2788 * ureg code to emit the right UsageMask value (WRITEMASK_X).
2789 * Then, we could remove the tgsi_exec_machine::Face field.
2790 */
2791 /* XXX make FACE a system value */
2792 if (decl->Semantic.Name == TGSI_SEMANTIC_FACE) {
2793 unsigned i;
2794
2795 assert(decl->Semantic.Index == 0);
2796 assert(first == last);
2797
2798 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
2799 mach->Inputs[first].xyzw[0].f[i] = mach->Face;
2800 }
2801 } else {
2802 eval_coef_func eval;
2803 apply_sample_offset_func interp;
2804 unsigned i, j;
2805
2806 switch (decl->Interp.Interpolate) {
2807 case TGSI_INTERPOLATE_CONSTANT:
2808 eval = eval_constant_coef;
2809 interp = interp_constant_offset;
2810 break;
2811
2812 case TGSI_INTERPOLATE_LINEAR:
2813 eval = eval_linear_coef;
2814 interp = interp_linear_offset;
2815 break;
2816
2817 case TGSI_INTERPOLATE_PERSPECTIVE:
2818 eval = eval_perspective_coef;
2819 interp = interp_perspective_offset;
2820 break;
2821
2822 case TGSI_INTERPOLATE_COLOR:
2823 eval = mach->flatshade_color ? eval_constant_coef : eval_perspective_coef;
2824 interp = mach->flatshade_color ? interp_constant_offset : interp_perspective_offset;
2825 break;
2826
2827 default:
2828 assert(0);
2829 return;
2830 }
2831
2832 for (i = first; i <= last; i++)
2833 mach->InputSampleOffsetApply[i] = interp;
2834
2835 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2836 if (mask & (1 << j)) {
2837 for (i = first; i <= last; i++) {
2838 eval(mach, i, j);
2839 }
2840 }
2841 }
2842 }
2843
2844 if (DEBUG_EXECUTION) {
2845 unsigned i, j;
2846 for (i = first; i <= last; ++i) {
2847 debug_printf("IN[%2u] = ", i);
2848 for (j = 0; j < TGSI_NUM_CHANNELS; j++) {
2849 if (j > 0) {
2850 debug_printf(" ");
2851 }
2852 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
2853 mach->Inputs[i].xyzw[0].f[j], mach->Inputs[i].xyzw[0].u[j],
2854 mach->Inputs[i].xyzw[1].f[j], mach->Inputs[i].xyzw[1].u[j],
2855 mach->Inputs[i].xyzw[2].f[j], mach->Inputs[i].xyzw[2].u[j],
2856 mach->Inputs[i].xyzw[3].f[j], mach->Inputs[i].xyzw[3].u[j]);
2857 }
2858 }
2859 }
2860 }
2861 }
2862
2863 }
2864
2865 typedef void (* micro_unary_op)(union tgsi_exec_channel *dst,
2866 const union tgsi_exec_channel *src);
2867
2868 static void
exec_scalar_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype src_datatype)2869 exec_scalar_unary(struct tgsi_exec_machine *mach,
2870 const struct tgsi_full_instruction *inst,
2871 micro_unary_op op,
2872 enum tgsi_exec_datatype src_datatype)
2873 {
2874 unsigned int chan;
2875 union tgsi_exec_channel src;
2876 union tgsi_exec_channel dst;
2877
2878 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
2879 op(&dst, &src);
2880 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2881 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2882 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2883 }
2884 }
2885 }
2886
2887 static void
exec_vector_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_unary_op op,enum tgsi_exec_datatype src_datatype)2888 exec_vector_unary(struct tgsi_exec_machine *mach,
2889 const struct tgsi_full_instruction *inst,
2890 micro_unary_op op,
2891 enum tgsi_exec_datatype src_datatype)
2892 {
2893 unsigned int chan;
2894 struct tgsi_exec_vector dst;
2895
2896 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2897 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2898 union tgsi_exec_channel src;
2899
2900 fetch_source(mach, &src, &inst->Src[0], chan, src_datatype);
2901 op(&dst.xyzw[chan], &src);
2902 }
2903 }
2904 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2905 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2906 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
2907 }
2908 }
2909 }
2910
2911 typedef void (* micro_binary_op)(union tgsi_exec_channel *dst,
2912 const union tgsi_exec_channel *src0,
2913 const union tgsi_exec_channel *src1);
2914
2915 static void
exec_scalar_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype src_datatype)2916 exec_scalar_binary(struct tgsi_exec_machine *mach,
2917 const struct tgsi_full_instruction *inst,
2918 micro_binary_op op,
2919 enum tgsi_exec_datatype src_datatype)
2920 {
2921 unsigned int chan;
2922 union tgsi_exec_channel src[2];
2923 union tgsi_exec_channel dst;
2924
2925 fetch_source(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, src_datatype);
2926 fetch_source(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, src_datatype);
2927 op(&dst, &src[0], &src[1]);
2928 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2929 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2930 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
2931 }
2932 }
2933 }
2934
2935 static void
exec_vector_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_binary_op op,enum tgsi_exec_datatype src_datatype)2936 exec_vector_binary(struct tgsi_exec_machine *mach,
2937 const struct tgsi_full_instruction *inst,
2938 micro_binary_op op,
2939 enum tgsi_exec_datatype src_datatype)
2940 {
2941 unsigned int chan;
2942 struct tgsi_exec_vector dst;
2943
2944 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2945 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2946 union tgsi_exec_channel src[2];
2947
2948 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2949 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2950 op(&dst.xyzw[chan], &src[0], &src[1]);
2951 }
2952 }
2953 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2954 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2955 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
2956 }
2957 }
2958 }
2959
2960 typedef void (* micro_trinary_op)(union tgsi_exec_channel *dst,
2961 const union tgsi_exec_channel *src0,
2962 const union tgsi_exec_channel *src1,
2963 const union tgsi_exec_channel *src2);
2964
2965 static void
exec_vector_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_trinary_op op,enum tgsi_exec_datatype src_datatype)2966 exec_vector_trinary(struct tgsi_exec_machine *mach,
2967 const struct tgsi_full_instruction *inst,
2968 micro_trinary_op op,
2969 enum tgsi_exec_datatype src_datatype)
2970 {
2971 unsigned int chan;
2972 struct tgsi_exec_vector dst;
2973
2974 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2975 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2976 union tgsi_exec_channel src[3];
2977
2978 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
2979 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
2980 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
2981 op(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
2982 }
2983 }
2984 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
2985 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
2986 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
2987 }
2988 }
2989 }
2990
2991 typedef void (* micro_quaternary_op)(union tgsi_exec_channel *dst,
2992 const union tgsi_exec_channel *src0,
2993 const union tgsi_exec_channel *src1,
2994 const union tgsi_exec_channel *src2,
2995 const union tgsi_exec_channel *src3);
2996
2997 static void
exec_vector_quaternary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_quaternary_op op,enum tgsi_exec_datatype src_datatype)2998 exec_vector_quaternary(struct tgsi_exec_machine *mach,
2999 const struct tgsi_full_instruction *inst,
3000 micro_quaternary_op op,
3001 enum tgsi_exec_datatype src_datatype)
3002 {
3003 unsigned int chan;
3004 struct tgsi_exec_vector dst;
3005
3006 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3007 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3008 union tgsi_exec_channel src[4];
3009
3010 fetch_source(mach, &src[0], &inst->Src[0], chan, src_datatype);
3011 fetch_source(mach, &src[1], &inst->Src[1], chan, src_datatype);
3012 fetch_source(mach, &src[2], &inst->Src[2], chan, src_datatype);
3013 fetch_source(mach, &src[3], &inst->Src[3], chan, src_datatype);
3014 op(&dst.xyzw[chan], &src[0], &src[1], &src[2], &src[3]);
3015 }
3016 }
3017 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3018 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3019 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3020 }
3021 }
3022 }
3023
3024 static void
exec_dp3(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3025 exec_dp3(struct tgsi_exec_machine *mach,
3026 const struct tgsi_full_instruction *inst)
3027 {
3028 unsigned int chan;
3029 union tgsi_exec_channel arg[3];
3030
3031 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3032 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3033 micro_mul(&arg[2], &arg[0], &arg[1]);
3034
3035 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_Z; chan++) {
3036 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3037 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3038 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3039 }
3040
3041 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3042 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3043 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3044 }
3045 }
3046 }
3047
3048 static void
exec_dp4(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3049 exec_dp4(struct tgsi_exec_machine *mach,
3050 const struct tgsi_full_instruction *inst)
3051 {
3052 unsigned int chan;
3053 union tgsi_exec_channel arg[3];
3054
3055 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3056 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3057 micro_mul(&arg[2], &arg[0], &arg[1]);
3058
3059 for (chan = TGSI_CHAN_Y; chan <= TGSI_CHAN_W; chan++) {
3060 fetch_source(mach, &arg[0], &inst->Src[0], chan, TGSI_EXEC_DATA_FLOAT);
3061 fetch_source(mach, &arg[1], &inst->Src[1], chan, TGSI_EXEC_DATA_FLOAT);
3062 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3063 }
3064
3065 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3066 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3067 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3068 }
3069 }
3070 }
3071
3072 static void
exec_dp2(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3073 exec_dp2(struct tgsi_exec_machine *mach,
3074 const struct tgsi_full_instruction *inst)
3075 {
3076 unsigned int chan;
3077 union tgsi_exec_channel arg[3];
3078
3079 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3080 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3081 micro_mul(&arg[2], &arg[0], &arg[1]);
3082
3083 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3084 fetch_source(mach, &arg[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3085 micro_mad(&arg[2], &arg[0], &arg[1], &arg[2]);
3086
3087 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3088 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3089 store_dest(mach, &arg[2], &inst->Dst[0], inst, chan);
3090 }
3091 }
3092 }
3093
3094 static void
exec_pk2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3095 exec_pk2h(struct tgsi_exec_machine *mach,
3096 const struct tgsi_full_instruction *inst)
3097 {
3098 unsigned chan;
3099 union tgsi_exec_channel arg[2], dst;
3100
3101 fetch_source(mach, &arg[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3102 fetch_source(mach, &arg[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3103 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3104 dst.u[chan] = _mesa_float_to_half(arg[0].f[chan]) |
3105 (_mesa_float_to_half(arg[1].f[chan]) << 16);
3106 }
3107 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3108 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3109 store_dest(mach, &dst, &inst->Dst[0], inst, chan);
3110 }
3111 }
3112 }
3113
3114 static void
exec_up2h(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3115 exec_up2h(struct tgsi_exec_machine *mach,
3116 const struct tgsi_full_instruction *inst)
3117 {
3118 unsigned chan;
3119 union tgsi_exec_channel arg, dst[2];
3120
3121 fetch_source(mach, &arg, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3122 for (chan = 0; chan < TGSI_QUAD_SIZE; chan++) {
3123 dst[0].f[chan] = _mesa_half_to_float(arg.u[chan] & 0xffff);
3124 dst[1].f[chan] = _mesa_half_to_float(arg.u[chan] >> 16);
3125 }
3126 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3127 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3128 store_dest(mach, &dst[chan & 1], &inst->Dst[0], inst, chan);
3129 }
3130 }
3131 }
3132
3133 static void
micro_ucmp(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)3134 micro_ucmp(union tgsi_exec_channel *dst,
3135 const union tgsi_exec_channel *src0,
3136 const union tgsi_exec_channel *src1,
3137 const union tgsi_exec_channel *src2)
3138 {
3139 dst->f[0] = src0->u[0] ? src1->f[0] : src2->f[0];
3140 dst->f[1] = src0->u[1] ? src1->f[1] : src2->f[1];
3141 dst->f[2] = src0->u[2] ? src1->f[2] : src2->f[2];
3142 dst->f[3] = src0->u[3] ? src1->f[3] : src2->f[3];
3143 }
3144
3145 static void
exec_ucmp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3146 exec_ucmp(struct tgsi_exec_machine *mach,
3147 const struct tgsi_full_instruction *inst)
3148 {
3149 unsigned int chan;
3150 struct tgsi_exec_vector dst;
3151
3152 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3153 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3154 union tgsi_exec_channel src[3];
3155
3156 fetch_source(mach, &src[0], &inst->Src[0], chan,
3157 TGSI_EXEC_DATA_UINT);
3158 fetch_source(mach, &src[1], &inst->Src[1], chan,
3159 TGSI_EXEC_DATA_FLOAT);
3160 fetch_source(mach, &src[2], &inst->Src[2], chan,
3161 TGSI_EXEC_DATA_FLOAT);
3162 micro_ucmp(&dst.xyzw[chan], &src[0], &src[1], &src[2]);
3163 }
3164 }
3165 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3166 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3167 store_dest(mach, &dst.xyzw[chan], &inst->Dst[0], inst, chan);
3168 }
3169 }
3170 }
3171
3172 static void
exec_dst(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3173 exec_dst(struct tgsi_exec_machine *mach,
3174 const struct tgsi_full_instruction *inst)
3175 {
3176 union tgsi_exec_channel r[2];
3177 union tgsi_exec_channel d[4];
3178
3179 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3180 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3181 fetch_source(mach, &r[1], &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3182 micro_mul(&d[TGSI_CHAN_Y], &r[0], &r[1]);
3183 }
3184 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3185 fetch_source(mach, &d[TGSI_CHAN_Z], &inst->Src[0], TGSI_CHAN_Z, TGSI_EXEC_DATA_FLOAT);
3186 }
3187 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3188 fetch_source(mach, &d[TGSI_CHAN_W], &inst->Src[1], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3189 }
3190
3191 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3192 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3193 }
3194 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3195 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3196 }
3197 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3198 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3199 }
3200 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3201 store_dest(mach, &d[TGSI_CHAN_W], &inst->Dst[0], inst, TGSI_CHAN_W);
3202 }
3203 }
3204
3205 static void
exec_log(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3206 exec_log(struct tgsi_exec_machine *mach,
3207 const struct tgsi_full_instruction *inst)
3208 {
3209 union tgsi_exec_channel r[3];
3210
3211 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3212 micro_abs(&r[2], &r[0]); /* r2 = abs(r0) */
3213 micro_lg2(&r[1], &r[2]); /* r1 = lg2(r2) */
3214 micro_flr(&r[0], &r[1]); /* r0 = floor(r1) */
3215 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3216 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_X);
3217 }
3218 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3219 micro_exp2(&r[0], &r[0]); /* r0 = 2 ^ r0 */
3220 micro_div(&r[0], &r[2], &r[0]); /* r0 = r2 / r0 */
3221 store_dest(mach, &r[0], &inst->Dst[0], inst, TGSI_CHAN_Y);
3222 }
3223 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3224 store_dest(mach, &r[1], &inst->Dst[0], inst, TGSI_CHAN_Z);
3225 }
3226 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3227 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3228 }
3229 }
3230
3231 static void
exec_exp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3232 exec_exp(struct tgsi_exec_machine *mach,
3233 const struct tgsi_full_instruction *inst)
3234 {
3235 union tgsi_exec_channel r[3];
3236
3237 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3238 micro_flr(&r[1], &r[0]); /* r1 = floor(r0) */
3239 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3240 micro_exp2(&r[2], &r[1]); /* r2 = 2 ^ r1 */
3241 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_X);
3242 }
3243 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3244 micro_sub(&r[2], &r[0], &r[1]); /* r2 = r0 - r1 */
3245 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Y);
3246 }
3247 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3248 micro_exp2(&r[2], &r[0]); /* r2 = 2 ^ r0 */
3249 store_dest(mach, &r[2], &inst->Dst[0], inst, TGSI_CHAN_Z);
3250 }
3251 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3252 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3253 }
3254 }
3255
3256 static void
exec_lit(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3257 exec_lit(struct tgsi_exec_machine *mach,
3258 const struct tgsi_full_instruction *inst)
3259 {
3260 union tgsi_exec_channel r[3];
3261 union tgsi_exec_channel d[3];
3262
3263 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_YZ) {
3264 fetch_source(mach, &r[0], &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
3265 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Z) {
3266 fetch_source(mach, &r[1], &inst->Src[0], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
3267 micro_max(&r[1], &r[1], &ZeroVec);
3268
3269 fetch_source(mach, &r[2], &inst->Src[0], TGSI_CHAN_W, TGSI_EXEC_DATA_FLOAT);
3270 micro_min(&r[2], &r[2], &P128Vec);
3271 micro_max(&r[2], &r[2], &M128Vec);
3272 micro_pow(&r[1], &r[1], &r[2]);
3273 micro_lt(&d[TGSI_CHAN_Z], &ZeroVec, &r[0], &r[1], &ZeroVec);
3274 store_dest(mach, &d[TGSI_CHAN_Z], &inst->Dst[0], inst, TGSI_CHAN_Z);
3275 }
3276 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_Y) {
3277 micro_max(&d[TGSI_CHAN_Y], &r[0], &ZeroVec);
3278 store_dest(mach, &d[TGSI_CHAN_Y], &inst->Dst[0], inst, TGSI_CHAN_Y);
3279 }
3280 }
3281 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
3282 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_X);
3283 }
3284
3285 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_W) {
3286 store_dest(mach, &OneVec, &inst->Dst[0], inst, TGSI_CHAN_W);
3287 }
3288 }
3289
3290 static void
exec_break(struct tgsi_exec_machine * mach)3291 exec_break(struct tgsi_exec_machine *mach)
3292 {
3293 if (mach->BreakType == TGSI_EXEC_BREAK_INSIDE_LOOP) {
3294 /* turn off loop channels for each enabled exec channel */
3295 mach->LoopMask &= ~mach->ExecMask;
3296 /* Todo: if mach->LoopMask == 0, jump to end of loop */
3297 UPDATE_EXEC_MASK(mach);
3298 } else {
3299 assert(mach->BreakType == TGSI_EXEC_BREAK_INSIDE_SWITCH);
3300
3301 mach->Switch.mask = 0x0;
3302
3303 UPDATE_EXEC_MASK(mach);
3304 }
3305 }
3306
3307 static void
exec_switch(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3308 exec_switch(struct tgsi_exec_machine *mach,
3309 const struct tgsi_full_instruction *inst)
3310 {
3311 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
3312 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
3313
3314 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
3315 fetch_source(mach, &mach->Switch.selector, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3316 mach->Switch.mask = 0x0;
3317 mach->Switch.defaultMask = 0x0;
3318
3319 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
3320 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_SWITCH;
3321
3322 UPDATE_EXEC_MASK(mach);
3323 }
3324
3325 static void
exec_case(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3326 exec_case(struct tgsi_exec_machine *mach,
3327 const struct tgsi_full_instruction *inst)
3328 {
3329 unsigned prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3330 union tgsi_exec_channel src;
3331 unsigned mask = 0;
3332
3333 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_EXEC_DATA_UINT);
3334
3335 if (mach->Switch.selector.u[0] == src.u[0]) {
3336 mask |= 0x1;
3337 }
3338 if (mach->Switch.selector.u[1] == src.u[1]) {
3339 mask |= 0x2;
3340 }
3341 if (mach->Switch.selector.u[2] == src.u[2]) {
3342 mask |= 0x4;
3343 }
3344 if (mach->Switch.selector.u[3] == src.u[3]) {
3345 mask |= 0x8;
3346 }
3347
3348 mach->Switch.defaultMask |= mask;
3349
3350 mach->Switch.mask |= mask & prevMask;
3351
3352 UPDATE_EXEC_MASK(mach);
3353 }
3354
3355 /* FIXME: this will only work if default is last */
3356 static void
exec_default(struct tgsi_exec_machine * mach)3357 exec_default(struct tgsi_exec_machine *mach)
3358 {
3359 unsigned prevMask = mach->SwitchStack[mach->SwitchStackTop - 1].mask;
3360
3361 mach->Switch.mask |= ~mach->Switch.defaultMask & prevMask;
3362
3363 UPDATE_EXEC_MASK(mach);
3364 }
3365
3366 static void
exec_endswitch(struct tgsi_exec_machine * mach)3367 exec_endswitch(struct tgsi_exec_machine *mach)
3368 {
3369 mach->Switch = mach->SwitchStack[--mach->SwitchStackTop];
3370 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
3371
3372 UPDATE_EXEC_MASK(mach);
3373 }
3374
3375 typedef void (* micro_dop)(union tgsi_double_channel *dst,
3376 const union tgsi_double_channel *src);
3377
3378 typedef void (* micro_dop_sop)(union tgsi_double_channel *dst,
3379 const union tgsi_double_channel *src0,
3380 union tgsi_exec_channel *src1);
3381
3382 typedef void (* micro_dop_s)(union tgsi_double_channel *dst,
3383 const union tgsi_exec_channel *src);
3384
3385 typedef void (* micro_sop_d)(union tgsi_exec_channel *dst,
3386 const union tgsi_double_channel *src);
3387
3388 static void
fetch_double_channel(struct tgsi_exec_machine * mach,union tgsi_double_channel * chan,const struct tgsi_full_src_register * reg,unsigned chan_0,unsigned chan_1)3389 fetch_double_channel(struct tgsi_exec_machine *mach,
3390 union tgsi_double_channel *chan,
3391 const struct tgsi_full_src_register *reg,
3392 unsigned chan_0,
3393 unsigned chan_1)
3394 {
3395 union tgsi_exec_channel src[2];
3396 unsigned i;
3397
3398 fetch_source_d(mach, &src[0], reg, chan_0);
3399 fetch_source_d(mach, &src[1], reg, chan_1);
3400
3401 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3402 chan->u[i][0] = src[0].u[i];
3403 chan->u[i][1] = src[1].u[i];
3404 }
3405 assert(!reg->Register.Absolute);
3406 assert(!reg->Register.Negate);
3407 }
3408
3409 static void
store_double_channel(struct tgsi_exec_machine * mach,const union tgsi_double_channel * chan,const struct tgsi_full_dst_register * reg,const struct tgsi_full_instruction * inst,unsigned chan_0,unsigned chan_1)3410 store_double_channel(struct tgsi_exec_machine *mach,
3411 const union tgsi_double_channel *chan,
3412 const struct tgsi_full_dst_register *reg,
3413 const struct tgsi_full_instruction *inst,
3414 unsigned chan_0,
3415 unsigned chan_1)
3416 {
3417 union tgsi_exec_channel dst[2];
3418 unsigned i;
3419 union tgsi_double_channel temp;
3420 const unsigned execmask = mach->ExecMask;
3421
3422 if (!inst->Instruction.Saturate) {
3423 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3424 if (execmask & (1 << i)) {
3425 dst[0].u[i] = chan->u[i][0];
3426 dst[1].u[i] = chan->u[i][1];
3427 }
3428 }
3429 else {
3430 for (i = 0; i < TGSI_QUAD_SIZE; i++)
3431 if (execmask & (1 << i)) {
3432 if (chan->d[i] < 0.0 || isnan(chan->d[i]))
3433 temp.d[i] = 0.0;
3434 else if (chan->d[i] > 1.0)
3435 temp.d[i] = 1.0;
3436 else
3437 temp.d[i] = chan->d[i];
3438
3439 dst[0].u[i] = temp.u[i][0];
3440 dst[1].u[i] = temp.u[i][1];
3441 }
3442 }
3443
3444 store_dest_double(mach, &dst[0], reg, chan_0);
3445 if (chan_1 != (unsigned)-1)
3446 store_dest_double(mach, &dst[1], reg, chan_1);
3447 }
3448
3449 static void
exec_double_unary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3450 exec_double_unary(struct tgsi_exec_machine *mach,
3451 const struct tgsi_full_instruction *inst,
3452 micro_dop op)
3453 {
3454 union tgsi_double_channel src;
3455 union tgsi_double_channel dst;
3456
3457 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3458 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3459 op(&dst, &src);
3460 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3461 }
3462 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3463 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3464 op(&dst, &src);
3465 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3466 }
3467 }
3468
3469 static void
exec_double_binary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op,enum tgsi_exec_datatype dst_datatype)3470 exec_double_binary(struct tgsi_exec_machine *mach,
3471 const struct tgsi_full_instruction *inst,
3472 micro_dop op,
3473 enum tgsi_exec_datatype dst_datatype)
3474 {
3475 union tgsi_double_channel src[2];
3476 union tgsi_double_channel dst;
3477 int first_dest_chan, second_dest_chan;
3478 int wmask;
3479
3480 wmask = inst->Dst[0].Register.WriteMask;
3481 /* these are & because of the way DSLT etc store their destinations */
3482 if (wmask & TGSI_WRITEMASK_XY) {
3483 first_dest_chan = TGSI_CHAN_X;
3484 second_dest_chan = TGSI_CHAN_Y;
3485 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3486 first_dest_chan = (wmask & TGSI_WRITEMASK_X) ? TGSI_CHAN_X : TGSI_CHAN_Y;
3487 second_dest_chan = -1;
3488 }
3489
3490 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3491 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3492 op(&dst, src);
3493 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3494 }
3495
3496 if (wmask & TGSI_WRITEMASK_ZW) {
3497 first_dest_chan = TGSI_CHAN_Z;
3498 second_dest_chan = TGSI_CHAN_W;
3499 if (dst_datatype == TGSI_EXEC_DATA_UINT) {
3500 first_dest_chan = (wmask & TGSI_WRITEMASK_Z) ? TGSI_CHAN_Z : TGSI_CHAN_W;
3501 second_dest_chan = -1;
3502 }
3503
3504 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3505 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3506 op(&dst, src);
3507 store_double_channel(mach, &dst, &inst->Dst[0], inst, first_dest_chan, second_dest_chan);
3508 }
3509 }
3510
3511 static void
exec_double_trinary(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop op)3512 exec_double_trinary(struct tgsi_exec_machine *mach,
3513 const struct tgsi_full_instruction *inst,
3514 micro_dop op)
3515 {
3516 union tgsi_double_channel src[3];
3517 union tgsi_double_channel dst;
3518
3519 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
3520 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3521 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_X, TGSI_CHAN_Y);
3522 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_X, TGSI_CHAN_Y);
3523 op(&dst, src);
3524 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3525 }
3526 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
3527 fetch_double_channel(mach, &src[0], &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3528 fetch_double_channel(mach, &src[1], &inst->Src[1], TGSI_CHAN_Z, TGSI_CHAN_W);
3529 fetch_double_channel(mach, &src[2], &inst->Src[2], TGSI_CHAN_Z, TGSI_CHAN_W);
3530 op(&dst, src);
3531 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3532 }
3533 }
3534
3535 static void
exec_dldexp(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3536 exec_dldexp(struct tgsi_exec_machine *mach,
3537 const struct tgsi_full_instruction *inst)
3538 {
3539 union tgsi_double_channel src0;
3540 union tgsi_exec_channel src1;
3541 union tgsi_double_channel dst;
3542 int wmask;
3543
3544 wmask = inst->Dst[0].Register.WriteMask;
3545 if (wmask & TGSI_WRITEMASK_XY) {
3546 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3547 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3548 micro_dldexp(&dst, &src0, &src1);
3549 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3550 }
3551
3552 if (wmask & TGSI_WRITEMASK_ZW) {
3553 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3554 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3555 micro_dldexp(&dst, &src0, &src1);
3556 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3557 }
3558 }
3559
3560 static void
exec_arg0_64_arg1_32(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_sop op)3561 exec_arg0_64_arg1_32(struct tgsi_exec_machine *mach,
3562 const struct tgsi_full_instruction *inst,
3563 micro_dop_sop op)
3564 {
3565 union tgsi_double_channel src0;
3566 union tgsi_exec_channel src1;
3567 union tgsi_double_channel dst;
3568 int wmask;
3569
3570 wmask = inst->Dst[0].Register.WriteMask;
3571 if (wmask & TGSI_WRITEMASK_XY) {
3572 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
3573 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_INT);
3574 op(&dst, &src0, &src1);
3575 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
3576 }
3577
3578 if (wmask & TGSI_WRITEMASK_ZW) {
3579 fetch_double_channel(mach, &src0, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
3580 fetch_source(mach, &src1, &inst->Src[1], TGSI_CHAN_Z, TGSI_EXEC_DATA_INT);
3581 op(&dst, &src0, &src1);
3582 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
3583 }
3584 }
3585
3586 static int
get_image_coord_dim(unsigned tgsi_tex)3587 get_image_coord_dim(unsigned tgsi_tex)
3588 {
3589 int dim;
3590 switch (tgsi_tex) {
3591 case TGSI_TEXTURE_BUFFER:
3592 case TGSI_TEXTURE_1D:
3593 dim = 1;
3594 break;
3595 case TGSI_TEXTURE_2D:
3596 case TGSI_TEXTURE_RECT:
3597 case TGSI_TEXTURE_1D_ARRAY:
3598 case TGSI_TEXTURE_2D_MSAA:
3599 dim = 2;
3600 break;
3601 case TGSI_TEXTURE_3D:
3602 case TGSI_TEXTURE_CUBE:
3603 case TGSI_TEXTURE_2D_ARRAY:
3604 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3605 case TGSI_TEXTURE_CUBE_ARRAY:
3606 dim = 3;
3607 break;
3608 default:
3609 assert(!"unknown texture target");
3610 dim = 0;
3611 break;
3612 }
3613
3614 return dim;
3615 }
3616
3617 static int
get_image_coord_sample(unsigned tgsi_tex)3618 get_image_coord_sample(unsigned tgsi_tex)
3619 {
3620 int sample = 0;
3621 switch (tgsi_tex) {
3622 case TGSI_TEXTURE_2D_MSAA:
3623 sample = 3;
3624 break;
3625 case TGSI_TEXTURE_2D_ARRAY_MSAA:
3626 sample = 4;
3627 break;
3628 default:
3629 break;
3630 }
3631 return sample;
3632 }
3633
3634 static void
exec_load_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3635 exec_load_img(struct tgsi_exec_machine *mach,
3636 const struct tgsi_full_instruction *inst)
3637 {
3638 union tgsi_exec_channel r[4], sample_r;
3639 unsigned unit;
3640 int sample;
3641 int i, j;
3642 int dim;
3643 unsigned chan;
3644 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3645 struct tgsi_image_params params;
3646
3647 unit = fetch_sampler_unit(mach, inst, 0);
3648 dim = get_image_coord_dim(inst->Memory.Texture);
3649 sample = get_image_coord_sample(inst->Memory.Texture);
3650 assert(dim <= 3);
3651
3652 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3653 params.unit = unit;
3654 params.tgsi_tex_instr = inst->Memory.Texture;
3655 params.format = inst->Memory.Format;
3656
3657 for (i = 0; i < dim; i++) {
3658 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3659 }
3660
3661 if (sample)
3662 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3663
3664 mach->Image->load(mach->Image, ¶ms,
3665 r[0].i, r[1].i, r[2].i, sample_r.i,
3666 rgba);
3667 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3668 r[0].f[j] = rgba[0][j];
3669 r[1].f[j] = rgba[1][j];
3670 r[2].f[j] = rgba[2][j];
3671 r[3].f[j] = rgba[3][j];
3672 }
3673 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3674 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3675 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
3676 }
3677 }
3678 }
3679
3680 static void
exec_load_membuf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3681 exec_load_membuf(struct tgsi_exec_machine *mach,
3682 const struct tgsi_full_instruction *inst)
3683 {
3684 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
3685
3686 uint32_t size;
3687 const char *ptr;
3688 switch (inst->Src[0].Register.File) {
3689 case TGSI_FILE_MEMORY:
3690 ptr = mach->LocalMem;
3691 size = mach->LocalMemSize;
3692 break;
3693
3694 case TGSI_FILE_BUFFER:
3695 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3696 break;
3697
3698 case TGSI_FILE_CONSTANT:
3699 if (unit < ARRAY_SIZE(mach->Consts)) {
3700 ptr = mach->Consts[unit];
3701 size = mach->ConstsSize[unit];
3702 } else {
3703 ptr = NULL;
3704 size = 0;
3705 }
3706 break;
3707
3708 default:
3709 unreachable("unsupported TGSI_OPCODE_LOAD file");
3710 }
3711
3712 union tgsi_exec_channel offset;
3713 IFETCH(&offset, 1, TGSI_CHAN_X);
3714
3715 assert(inst->Dst[0].Register.WriteMask);
3716 uint32_t load_size = util_last_bit(inst->Dst[0].Register.WriteMask) * 4;
3717
3718 union tgsi_exec_channel rgba[TGSI_NUM_CHANNELS];
3719 memset(&rgba, 0, sizeof(rgba));
3720 for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3721 if (size >= load_size && offset.u[j] <= (size - load_size)) {
3722 for (int chan = 0; chan < load_size / 4; chan++)
3723 rgba[chan].u[j] = *(uint32_t *)(ptr + offset.u[j] + chan * 4);
3724 }
3725 }
3726
3727 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3728 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3729 store_dest(mach, &rgba[chan], &inst->Dst[0], inst, chan);
3730 }
3731 }
3732 }
3733
3734 static void
exec_load(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3735 exec_load(struct tgsi_exec_machine *mach,
3736 const struct tgsi_full_instruction *inst)
3737 {
3738 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
3739 exec_load_img(mach, inst);
3740 else
3741 exec_load_membuf(mach, inst);
3742 }
3743
3744 static unsigned
fetch_store_img_unit(struct tgsi_exec_machine * mach,const struct tgsi_full_dst_register * dst)3745 fetch_store_img_unit(struct tgsi_exec_machine *mach,
3746 const struct tgsi_full_dst_register *dst)
3747 {
3748 unsigned unit = 0;
3749 int i;
3750 if (dst->Register.Indirect) {
3751 union tgsi_exec_channel indir_index, index2;
3752 const unsigned execmask = mach->ExecMask;
3753 index2.i[0] =
3754 index2.i[1] =
3755 index2.i[2] =
3756 index2.i[3] = dst->Indirect.Index;
3757
3758 fetch_src_file_channel(mach,
3759 dst->Indirect.File,
3760 dst->Indirect.Swizzle,
3761 &index2,
3762 &ZeroVec,
3763 &indir_index);
3764 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3765 if (execmask & (1 << i)) {
3766 unit = dst->Register.Index + indir_index.i[i];
3767 break;
3768 }
3769 }
3770 } else {
3771 unit = dst->Register.Index;
3772 }
3773 return unit;
3774 }
3775
3776 static void
exec_store_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3777 exec_store_img(struct tgsi_exec_machine *mach,
3778 const struct tgsi_full_instruction *inst)
3779 {
3780 union tgsi_exec_channel r[3], sample_r;
3781 union tgsi_exec_channel value[4];
3782 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3783 struct tgsi_image_params params;
3784 int dim;
3785 int sample;
3786 int i, j;
3787 unsigned unit;
3788 unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3789 dim = get_image_coord_dim(inst->Memory.Texture);
3790 sample = get_image_coord_sample(inst->Memory.Texture);
3791 assert(dim <= 3);
3792
3793 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3794 params.unit = unit;
3795 params.tgsi_tex_instr = inst->Memory.Texture;
3796 params.format = inst->Memory.Format;
3797
3798 for (i = 0; i < dim; i++) {
3799 IFETCH(&r[i], 0, TGSI_CHAN_X + i);
3800 }
3801
3802 for (i = 0; i < 4; i++) {
3803 FETCH(&value[i], 1, TGSI_CHAN_X + i);
3804 }
3805 if (sample)
3806 IFETCH(&sample_r, 0, TGSI_CHAN_X + sample);
3807
3808 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3809 rgba[0][j] = value[0].f[j];
3810 rgba[1][j] = value[1].f[j];
3811 rgba[2][j] = value[2].f[j];
3812 rgba[3][j] = value[3].f[j];
3813 }
3814
3815 mach->Image->store(mach->Image, ¶ms,
3816 r[0].i, r[1].i, r[2].i, sample_r.i,
3817 rgba);
3818 }
3819
3820
3821 static void
exec_store_membuf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3822 exec_store_membuf(struct tgsi_exec_machine *mach,
3823 const struct tgsi_full_instruction *inst)
3824 {
3825 uint32_t unit = fetch_store_img_unit(mach, &inst->Dst[0]);
3826 uint32_t size;
3827
3828 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3829
3830 const char *ptr;
3831 switch (inst->Dst[0].Register.File) {
3832 case TGSI_FILE_MEMORY:
3833 ptr = mach->LocalMem;
3834 size = mach->LocalMemSize;
3835 break;
3836
3837 case TGSI_FILE_BUFFER:
3838 ptr = mach->Buffer->lookup(mach->Buffer, unit, &size);
3839 break;
3840
3841 default:
3842 unreachable("unsupported TGSI_OPCODE_STORE file");
3843 }
3844
3845 union tgsi_exec_channel offset;
3846 IFETCH(&offset, 0, TGSI_CHAN_X);
3847
3848 union tgsi_exec_channel value[4];
3849 for (int i = 0; i < 4; i++)
3850 FETCH(&value[i], 1, TGSI_CHAN_X + i);
3851
3852 for (int j = 0; j < TGSI_QUAD_SIZE; j++) {
3853 if (!(execmask & (1 << j)))
3854 continue;
3855 if (size < offset.u[j])
3856 continue;
3857
3858 uint32_t *invocation_ptr = (uint32_t *)(ptr + offset.u[j]);
3859 uint32_t size_avail = size - offset.u[j];
3860
3861 for (int chan = 0; chan < MIN2(4, size_avail / 4); chan++) {
3862 if (inst->Dst[0].Register.WriteMask & (1 << chan))
3863 memcpy(&invocation_ptr[chan], &value[chan].u[j], 4);
3864 }
3865 }
3866 }
3867
3868 static void
exec_store(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3869 exec_store(struct tgsi_exec_machine *mach,
3870 const struct tgsi_full_instruction *inst)
3871 {
3872 if (inst->Dst[0].Register.File == TGSI_FILE_IMAGE)
3873 exec_store_img(mach, inst);
3874 else
3875 exec_store_membuf(mach, inst);
3876 }
3877
3878 static void
exec_atomop_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3879 exec_atomop_img(struct tgsi_exec_machine *mach,
3880 const struct tgsi_full_instruction *inst)
3881 {
3882 union tgsi_exec_channel r[4], sample_r;
3883 union tgsi_exec_channel value[4], value2[4];
3884 float rgba[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3885 float rgba2[TGSI_NUM_CHANNELS][TGSI_QUAD_SIZE];
3886 struct tgsi_image_params params;
3887 int dim;
3888 int sample;
3889 int i, j;
3890 unsigned unit, chan;
3891 unit = fetch_sampler_unit(mach, inst, 0);
3892 dim = get_image_coord_dim(inst->Memory.Texture);
3893 sample = get_image_coord_sample(inst->Memory.Texture);
3894 assert(dim <= 3);
3895
3896 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3897 params.unit = unit;
3898 params.tgsi_tex_instr = inst->Memory.Texture;
3899 params.format = inst->Memory.Format;
3900
3901 for (i = 0; i < dim; i++) {
3902 IFETCH(&r[i], 1, TGSI_CHAN_X + i);
3903 }
3904
3905 for (i = 0; i < 4; i++) {
3906 FETCH(&value[i], 2, TGSI_CHAN_X + i);
3907 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
3908 FETCH(&value2[i], 3, TGSI_CHAN_X + i);
3909 }
3910 if (sample)
3911 IFETCH(&sample_r, 1, TGSI_CHAN_X + sample);
3912
3913 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3914 rgba[0][j] = value[0].f[j];
3915 rgba[1][j] = value[1].f[j];
3916 rgba[2][j] = value[2].f[j];
3917 rgba[3][j] = value[3].f[j];
3918 }
3919 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS) {
3920 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3921 rgba2[0][j] = value2[0].f[j];
3922 rgba2[1][j] = value2[1].f[j];
3923 rgba2[2][j] = value2[2].f[j];
3924 rgba2[3][j] = value2[3].f[j];
3925 }
3926 }
3927
3928 mach->Image->op(mach->Image, ¶ms, inst->Instruction.Opcode,
3929 r[0].i, r[1].i, r[2].i, sample_r.i,
3930 rgba, rgba2);
3931
3932 for (j = 0; j < TGSI_QUAD_SIZE; j++) {
3933 r[0].f[j] = rgba[0][j];
3934 r[1].f[j] = rgba[1][j];
3935 r[2].f[j] = rgba[2][j];
3936 r[3].f[j] = rgba[3][j];
3937 }
3938 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
3939 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
3940 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
3941 }
3942 }
3943 }
3944
3945 static void
exec_atomop_membuf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)3946 exec_atomop_membuf(struct tgsi_exec_machine *mach,
3947 const struct tgsi_full_instruction *inst)
3948 {
3949 union tgsi_exec_channel offset, r0, r1;
3950 unsigned chan, i;
3951 int execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
3952 IFETCH(&offset, 1, TGSI_CHAN_X);
3953
3954 if (!(inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X))
3955 return;
3956
3957 void *ptr[TGSI_QUAD_SIZE];
3958 if (inst->Src[0].Register.File == TGSI_FILE_BUFFER) {
3959 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
3960 uint32_t size;
3961 char *buffer = mach->Buffer->lookup(mach->Buffer, unit, &size);
3962 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
3963 if (likely(size >= 4 && offset.u[i] <= size - 4))
3964 ptr[i] = buffer + offset.u[i];
3965 else
3966 ptr[i] = NULL;
3967 }
3968 } else {
3969 assert(inst->Src[0].Register.File == TGSI_FILE_MEMORY);
3970
3971 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
3972 if (likely(mach->LocalMemSize >= 4 && offset.u[i] <= mach->LocalMemSize - 4))
3973 ptr[i] = (char *)mach->LocalMem + offset.u[i];
3974 else
3975 ptr[i] = NULL;
3976 }
3977 }
3978
3979 FETCH(&r0, 2, TGSI_CHAN_X);
3980 if (inst->Instruction.Opcode == TGSI_OPCODE_ATOMCAS)
3981 FETCH(&r1, 3, TGSI_CHAN_X);
3982
3983 /* The load/op/store sequence has to happen inside the loop since ptr
3984 * may have the same ptr in some of the invocations.
3985 */
3986 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
3987 if (!(execmask & (1 << i)))
3988 continue;
3989
3990 uint32_t val = 0;
3991 if (ptr[i]) {
3992 memcpy(&val, ptr[i], sizeof(val));
3993
3994 uint32_t result;
3995 switch (inst->Instruction.Opcode) {
3996 case TGSI_OPCODE_ATOMUADD:
3997 result = val + r0.u[i];
3998 break;
3999 case TGSI_OPCODE_ATOMXOR:
4000 result = val ^ r0.u[i];
4001 break;
4002 case TGSI_OPCODE_ATOMOR:
4003 result = val | r0.u[i];
4004 break;
4005 case TGSI_OPCODE_ATOMAND:
4006 result = val & r0.u[i];
4007 break;
4008 case TGSI_OPCODE_ATOMUMIN:
4009 result = MIN2(val, r0.u[i]);
4010 break;
4011 case TGSI_OPCODE_ATOMUMAX:
4012 result = MAX2(val, r0.u[i]);
4013 break;
4014 case TGSI_OPCODE_ATOMIMIN:
4015 result = MIN2((int32_t)val, r0.i[i]);
4016 break;
4017 case TGSI_OPCODE_ATOMIMAX:
4018 result = MAX2((int32_t)val, r0.i[i]);
4019 break;
4020 case TGSI_OPCODE_ATOMXCHG:
4021 result = r0.u[i];
4022 break;
4023 case TGSI_OPCODE_ATOMCAS:
4024 if (val == r0.u[i])
4025 result = r1.u[i];
4026 else
4027 result = val;
4028 break;
4029 case TGSI_OPCODE_ATOMFADD:
4030 result = fui(uif(val) + r0.f[i]);
4031 break;
4032 default:
4033 unreachable("bad atomic op");
4034 }
4035 memcpy(ptr[i], &result, sizeof(result));
4036 }
4037
4038 r0.u[i] = val;
4039 }
4040
4041 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++)
4042 store_dest(mach, &r0, &inst->Dst[0], inst, chan);
4043 }
4044
4045 static void
exec_atomop(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4046 exec_atomop(struct tgsi_exec_machine *mach,
4047 const struct tgsi_full_instruction *inst)
4048 {
4049 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4050 exec_atomop_img(mach, inst);
4051 else
4052 exec_atomop_membuf(mach, inst);
4053 }
4054
4055 static void
exec_resq_img(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4056 exec_resq_img(struct tgsi_exec_machine *mach,
4057 const struct tgsi_full_instruction *inst)
4058 {
4059 int result[4];
4060 union tgsi_exec_channel r[4];
4061 unsigned unit;
4062 int i, chan, j;
4063 struct tgsi_image_params params;
4064
4065 unit = fetch_sampler_unit(mach, inst, 0);
4066
4067 params.execmask = mach->ExecMask & mach->NonHelperMask & ~mach->KillMask;
4068 params.unit = unit;
4069 params.tgsi_tex_instr = inst->Memory.Texture;
4070 params.format = inst->Memory.Format;
4071
4072 mach->Image->get_dims(mach->Image, ¶ms, result);
4073
4074 for (i = 0; i < TGSI_QUAD_SIZE; i++) {
4075 for (j = 0; j < 4; j++) {
4076 r[j].i[i] = result[j];
4077 }
4078 }
4079
4080 for (chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4081 if (inst->Dst[0].Register.WriteMask & (1 << chan)) {
4082 store_dest(mach, &r[chan], &inst->Dst[0], inst, chan);
4083 }
4084 }
4085 }
4086
4087 static void
exec_resq_buf(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4088 exec_resq_buf(struct tgsi_exec_machine *mach,
4089 const struct tgsi_full_instruction *inst)
4090 {
4091 uint32_t unit = fetch_sampler_unit(mach, inst, 0);
4092 uint32_t size;
4093 (void)mach->Buffer->lookup(mach->Buffer, unit, &size);
4094
4095 union tgsi_exec_channel r;
4096 for (int i = 0; i < TGSI_QUAD_SIZE; i++)
4097 r.i[i] = size;
4098
4099 if (inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_X) {
4100 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4101 store_dest(mach, &r, &inst->Dst[0], inst, TGSI_CHAN_X);
4102 }
4103 }
4104 }
4105
4106 static void
exec_resq(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4107 exec_resq(struct tgsi_exec_machine *mach,
4108 const struct tgsi_full_instruction *inst)
4109 {
4110 if (inst->Src[0].Register.File == TGSI_FILE_IMAGE)
4111 exec_resq_img(mach, inst);
4112 else
4113 exec_resq_buf(mach, inst);
4114 }
4115
4116 static void
micro_f2u64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4117 micro_f2u64(union tgsi_double_channel *dst,
4118 const union tgsi_exec_channel *src)
4119 {
4120 dst->u64[0] = (uint64_t)src->f[0];
4121 dst->u64[1] = (uint64_t)src->f[1];
4122 dst->u64[2] = (uint64_t)src->f[2];
4123 dst->u64[3] = (uint64_t)src->f[3];
4124 }
4125
4126 static void
micro_f2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4127 micro_f2i64(union tgsi_double_channel *dst,
4128 const union tgsi_exec_channel *src)
4129 {
4130 dst->i64[0] = (int64_t)src->f[0];
4131 dst->i64[1] = (int64_t)src->f[1];
4132 dst->i64[2] = (int64_t)src->f[2];
4133 dst->i64[3] = (int64_t)src->f[3];
4134 }
4135
4136 static void
micro_u2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4137 micro_u2i64(union tgsi_double_channel *dst,
4138 const union tgsi_exec_channel *src)
4139 {
4140 dst->u64[0] = (uint64_t)src->u[0];
4141 dst->u64[1] = (uint64_t)src->u[1];
4142 dst->u64[2] = (uint64_t)src->u[2];
4143 dst->u64[3] = (uint64_t)src->u[3];
4144 }
4145
4146 static void
micro_i2i64(union tgsi_double_channel * dst,const union tgsi_exec_channel * src)4147 micro_i2i64(union tgsi_double_channel *dst,
4148 const union tgsi_exec_channel *src)
4149 {
4150 dst->i64[0] = (int64_t)src->i[0];
4151 dst->i64[1] = (int64_t)src->i[1];
4152 dst->i64[2] = (int64_t)src->i[2];
4153 dst->i64[3] = (int64_t)src->i[3];
4154 }
4155
4156 static void
micro_d2u64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4157 micro_d2u64(union tgsi_double_channel *dst,
4158 const union tgsi_double_channel *src)
4159 {
4160 dst->u64[0] = (uint64_t)src->d[0];
4161 dst->u64[1] = (uint64_t)src->d[1];
4162 dst->u64[2] = (uint64_t)src->d[2];
4163 dst->u64[3] = (uint64_t)src->d[3];
4164 }
4165
4166 static void
micro_d2i64(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4167 micro_d2i64(union tgsi_double_channel *dst,
4168 const union tgsi_double_channel *src)
4169 {
4170 dst->i64[0] = (int64_t)src->d[0];
4171 dst->i64[1] = (int64_t)src->d[1];
4172 dst->i64[2] = (int64_t)src->d[2];
4173 dst->i64[3] = (int64_t)src->d[3];
4174 }
4175
4176 static void
micro_u642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4177 micro_u642d(union tgsi_double_channel *dst,
4178 const union tgsi_double_channel *src)
4179 {
4180 dst->d[0] = (double)src->u64[0];
4181 dst->d[1] = (double)src->u64[1];
4182 dst->d[2] = (double)src->u64[2];
4183 dst->d[3] = (double)src->u64[3];
4184 }
4185
4186 static void
micro_i642d(union tgsi_double_channel * dst,const union tgsi_double_channel * src)4187 micro_i642d(union tgsi_double_channel *dst,
4188 const union tgsi_double_channel *src)
4189 {
4190 dst->d[0] = (double)src->i64[0];
4191 dst->d[1] = (double)src->i64[1];
4192 dst->d[2] = (double)src->i64[2];
4193 dst->d[3] = (double)src->i64[3];
4194 }
4195
4196 static void
micro_u642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4197 micro_u642f(union tgsi_exec_channel *dst,
4198 const union tgsi_double_channel *src)
4199 {
4200 dst->f[0] = (float)src->u64[0];
4201 dst->f[1] = (float)src->u64[1];
4202 dst->f[2] = (float)src->u64[2];
4203 dst->f[3] = (float)src->u64[3];
4204 }
4205
4206 static void
micro_i642f(union tgsi_exec_channel * dst,const union tgsi_double_channel * src)4207 micro_i642f(union tgsi_exec_channel *dst,
4208 const union tgsi_double_channel *src)
4209 {
4210 dst->f[0] = (float)src->i64[0];
4211 dst->f[1] = (float)src->i64[1];
4212 dst->f[2] = (float)src->i64[2];
4213 dst->f[3] = (float)src->i64[3];
4214 }
4215
4216 static void
exec_t_2_64(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_dop_s op,enum tgsi_exec_datatype src_datatype)4217 exec_t_2_64(struct tgsi_exec_machine *mach,
4218 const struct tgsi_full_instruction *inst,
4219 micro_dop_s op,
4220 enum tgsi_exec_datatype src_datatype)
4221 {
4222 union tgsi_exec_channel src;
4223 union tgsi_double_channel dst;
4224
4225 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_XY) == TGSI_WRITEMASK_XY) {
4226 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_X, src_datatype);
4227 op(&dst, &src);
4228 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_X, TGSI_CHAN_Y);
4229 }
4230 if ((inst->Dst[0].Register.WriteMask & TGSI_WRITEMASK_ZW) == TGSI_WRITEMASK_ZW) {
4231 fetch_source(mach, &src, &inst->Src[0], TGSI_CHAN_Y, src_datatype);
4232 op(&dst, &src);
4233 store_double_channel(mach, &dst, &inst->Dst[0], inst, TGSI_CHAN_Z, TGSI_CHAN_W);
4234 }
4235 }
4236
4237 static void
exec_64_2_t(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,micro_sop_d op)4238 exec_64_2_t(struct tgsi_exec_machine *mach,
4239 const struct tgsi_full_instruction *inst,
4240 micro_sop_d op)
4241 {
4242 union tgsi_double_channel src;
4243 union tgsi_exec_channel dst;
4244 int wm = inst->Dst[0].Register.WriteMask;
4245 int i;
4246 int bit;
4247 for (i = 0; i < 2; i++) {
4248 bit = ffs(wm);
4249 if (bit) {
4250 wm &= ~(1 << (bit - 1));
4251 if (i == 0)
4252 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_X, TGSI_CHAN_Y);
4253 else
4254 fetch_double_channel(mach, &src, &inst->Src[0], TGSI_CHAN_Z, TGSI_CHAN_W);
4255 op(&dst, &src);
4256 store_dest(mach, &dst, &inst->Dst[0], inst, bit - 1);
4257 }
4258 }
4259 }
4260
4261 static void
micro_i2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4262 micro_i2f(union tgsi_exec_channel *dst,
4263 const union tgsi_exec_channel *src)
4264 {
4265 dst->f[0] = (float)src->i[0];
4266 dst->f[1] = (float)src->i[1];
4267 dst->f[2] = (float)src->i[2];
4268 dst->f[3] = (float)src->i[3];
4269 }
4270
4271 static void
micro_not(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4272 micro_not(union tgsi_exec_channel *dst,
4273 const union tgsi_exec_channel *src)
4274 {
4275 dst->u[0] = ~src->u[0];
4276 dst->u[1] = ~src->u[1];
4277 dst->u[2] = ~src->u[2];
4278 dst->u[3] = ~src->u[3];
4279 }
4280
4281 static void
micro_shl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4282 micro_shl(union tgsi_exec_channel *dst,
4283 const union tgsi_exec_channel *src0,
4284 const union tgsi_exec_channel *src1)
4285 {
4286 unsigned masked_count;
4287 masked_count = src1->u[0] & 0x1f;
4288 dst->u[0] = src0->u[0] << masked_count;
4289 masked_count = src1->u[1] & 0x1f;
4290 dst->u[1] = src0->u[1] << masked_count;
4291 masked_count = src1->u[2] & 0x1f;
4292 dst->u[2] = src0->u[2] << masked_count;
4293 masked_count = src1->u[3] & 0x1f;
4294 dst->u[3] = src0->u[3] << masked_count;
4295 }
4296
4297 static void
micro_and(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4298 micro_and(union tgsi_exec_channel *dst,
4299 const union tgsi_exec_channel *src0,
4300 const union tgsi_exec_channel *src1)
4301 {
4302 dst->u[0] = src0->u[0] & src1->u[0];
4303 dst->u[1] = src0->u[1] & src1->u[1];
4304 dst->u[2] = src0->u[2] & src1->u[2];
4305 dst->u[3] = src0->u[3] & src1->u[3];
4306 }
4307
4308 static void
micro_or(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4309 micro_or(union tgsi_exec_channel *dst,
4310 const union tgsi_exec_channel *src0,
4311 const union tgsi_exec_channel *src1)
4312 {
4313 dst->u[0] = src0->u[0] | src1->u[0];
4314 dst->u[1] = src0->u[1] | src1->u[1];
4315 dst->u[2] = src0->u[2] | src1->u[2];
4316 dst->u[3] = src0->u[3] | src1->u[3];
4317 }
4318
4319 static void
micro_xor(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4320 micro_xor(union tgsi_exec_channel *dst,
4321 const union tgsi_exec_channel *src0,
4322 const union tgsi_exec_channel *src1)
4323 {
4324 dst->u[0] = src0->u[0] ^ src1->u[0];
4325 dst->u[1] = src0->u[1] ^ src1->u[1];
4326 dst->u[2] = src0->u[2] ^ src1->u[2];
4327 dst->u[3] = src0->u[3] ^ src1->u[3];
4328 }
4329
4330 static void
micro_mod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4331 micro_mod(union tgsi_exec_channel *dst,
4332 const union tgsi_exec_channel *src0,
4333 const union tgsi_exec_channel *src1)
4334 {
4335 dst->i[0] = src1->i[0] ? src0->i[0] % src1->i[0] : ~0;
4336 dst->i[1] = src1->i[1] ? src0->i[1] % src1->i[1] : ~0;
4337 dst->i[2] = src1->i[2] ? src0->i[2] % src1->i[2] : ~0;
4338 dst->i[3] = src1->i[3] ? src0->i[3] % src1->i[3] : ~0;
4339 }
4340
4341 static void
micro_f2i(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4342 micro_f2i(union tgsi_exec_channel *dst,
4343 const union tgsi_exec_channel *src)
4344 {
4345 dst->i[0] = (int)src->f[0];
4346 dst->i[1] = (int)src->f[1];
4347 dst->i[2] = (int)src->f[2];
4348 dst->i[3] = (int)src->f[3];
4349 }
4350
4351 static void
micro_fseq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4352 micro_fseq(union tgsi_exec_channel *dst,
4353 const union tgsi_exec_channel *src0,
4354 const union tgsi_exec_channel *src1)
4355 {
4356 dst->u[0] = src0->f[0] == src1->f[0] ? ~0 : 0;
4357 dst->u[1] = src0->f[1] == src1->f[1] ? ~0 : 0;
4358 dst->u[2] = src0->f[2] == src1->f[2] ? ~0 : 0;
4359 dst->u[3] = src0->f[3] == src1->f[3] ? ~0 : 0;
4360 }
4361
4362 static void
micro_fsge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4363 micro_fsge(union tgsi_exec_channel *dst,
4364 const union tgsi_exec_channel *src0,
4365 const union tgsi_exec_channel *src1)
4366 {
4367 dst->u[0] = src0->f[0] >= src1->f[0] ? ~0 : 0;
4368 dst->u[1] = src0->f[1] >= src1->f[1] ? ~0 : 0;
4369 dst->u[2] = src0->f[2] >= src1->f[2] ? ~0 : 0;
4370 dst->u[3] = src0->f[3] >= src1->f[3] ? ~0 : 0;
4371 }
4372
4373 static void
micro_fslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4374 micro_fslt(union tgsi_exec_channel *dst,
4375 const union tgsi_exec_channel *src0,
4376 const union tgsi_exec_channel *src1)
4377 {
4378 dst->u[0] = src0->f[0] < src1->f[0] ? ~0 : 0;
4379 dst->u[1] = src0->f[1] < src1->f[1] ? ~0 : 0;
4380 dst->u[2] = src0->f[2] < src1->f[2] ? ~0 : 0;
4381 dst->u[3] = src0->f[3] < src1->f[3] ? ~0 : 0;
4382 }
4383
4384 static void
micro_fsne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4385 micro_fsne(union tgsi_exec_channel *dst,
4386 const union tgsi_exec_channel *src0,
4387 const union tgsi_exec_channel *src1)
4388 {
4389 dst->u[0] = src0->f[0] != src1->f[0] ? ~0 : 0;
4390 dst->u[1] = src0->f[1] != src1->f[1] ? ~0 : 0;
4391 dst->u[2] = src0->f[2] != src1->f[2] ? ~0 : 0;
4392 dst->u[3] = src0->f[3] != src1->f[3] ? ~0 : 0;
4393 }
4394
4395 static void
micro_idiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4396 micro_idiv(union tgsi_exec_channel *dst,
4397 const union tgsi_exec_channel *src0,
4398 const union tgsi_exec_channel *src1)
4399 {
4400 dst->i[0] = src1->i[0] ? src0->i[0] / src1->i[0] : 0;
4401 dst->i[1] = src1->i[1] ? src0->i[1] / src1->i[1] : 0;
4402 dst->i[2] = src1->i[2] ? src0->i[2] / src1->i[2] : 0;
4403 dst->i[3] = src1->i[3] ? src0->i[3] / src1->i[3] : 0;
4404 }
4405
4406 static void
micro_imax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4407 micro_imax(union tgsi_exec_channel *dst,
4408 const union tgsi_exec_channel *src0,
4409 const union tgsi_exec_channel *src1)
4410 {
4411 dst->i[0] = src0->i[0] > src1->i[0] ? src0->i[0] : src1->i[0];
4412 dst->i[1] = src0->i[1] > src1->i[1] ? src0->i[1] : src1->i[1];
4413 dst->i[2] = src0->i[2] > src1->i[2] ? src0->i[2] : src1->i[2];
4414 dst->i[3] = src0->i[3] > src1->i[3] ? src0->i[3] : src1->i[3];
4415 }
4416
4417 static void
micro_imin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4418 micro_imin(union tgsi_exec_channel *dst,
4419 const union tgsi_exec_channel *src0,
4420 const union tgsi_exec_channel *src1)
4421 {
4422 dst->i[0] = src0->i[0] < src1->i[0] ? src0->i[0] : src1->i[0];
4423 dst->i[1] = src0->i[1] < src1->i[1] ? src0->i[1] : src1->i[1];
4424 dst->i[2] = src0->i[2] < src1->i[2] ? src0->i[2] : src1->i[2];
4425 dst->i[3] = src0->i[3] < src1->i[3] ? src0->i[3] : src1->i[3];
4426 }
4427
4428 static void
micro_isge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4429 micro_isge(union tgsi_exec_channel *dst,
4430 const union tgsi_exec_channel *src0,
4431 const union tgsi_exec_channel *src1)
4432 {
4433 dst->i[0] = src0->i[0] >= src1->i[0] ? -1 : 0;
4434 dst->i[1] = src0->i[1] >= src1->i[1] ? -1 : 0;
4435 dst->i[2] = src0->i[2] >= src1->i[2] ? -1 : 0;
4436 dst->i[3] = src0->i[3] >= src1->i[3] ? -1 : 0;
4437 }
4438
4439 static void
micro_ishr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4440 micro_ishr(union tgsi_exec_channel *dst,
4441 const union tgsi_exec_channel *src0,
4442 const union tgsi_exec_channel *src1)
4443 {
4444 unsigned masked_count;
4445 masked_count = src1->i[0] & 0x1f;
4446 dst->i[0] = src0->i[0] >> masked_count;
4447 masked_count = src1->i[1] & 0x1f;
4448 dst->i[1] = src0->i[1] >> masked_count;
4449 masked_count = src1->i[2] & 0x1f;
4450 dst->i[2] = src0->i[2] >> masked_count;
4451 masked_count = src1->i[3] & 0x1f;
4452 dst->i[3] = src0->i[3] >> masked_count;
4453 }
4454
4455 static void
micro_islt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4456 micro_islt(union tgsi_exec_channel *dst,
4457 const union tgsi_exec_channel *src0,
4458 const union tgsi_exec_channel *src1)
4459 {
4460 dst->i[0] = src0->i[0] < src1->i[0] ? -1 : 0;
4461 dst->i[1] = src0->i[1] < src1->i[1] ? -1 : 0;
4462 dst->i[2] = src0->i[2] < src1->i[2] ? -1 : 0;
4463 dst->i[3] = src0->i[3] < src1->i[3] ? -1 : 0;
4464 }
4465
4466 static void
micro_f2u(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4467 micro_f2u(union tgsi_exec_channel *dst,
4468 const union tgsi_exec_channel *src)
4469 {
4470 dst->u[0] = (uint32_t)src->f[0];
4471 dst->u[1] = (uint32_t)src->f[1];
4472 dst->u[2] = (uint32_t)src->f[2];
4473 dst->u[3] = (uint32_t)src->f[3];
4474 }
4475
4476 static void
micro_u2f(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4477 micro_u2f(union tgsi_exec_channel *dst,
4478 const union tgsi_exec_channel *src)
4479 {
4480 dst->f[0] = (float)src->u[0];
4481 dst->f[1] = (float)src->u[1];
4482 dst->f[2] = (float)src->u[2];
4483 dst->f[3] = (float)src->u[3];
4484 }
4485
4486 static void
micro_uadd(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4487 micro_uadd(union tgsi_exec_channel *dst,
4488 const union tgsi_exec_channel *src0,
4489 const union tgsi_exec_channel *src1)
4490 {
4491 dst->u[0] = src0->u[0] + src1->u[0];
4492 dst->u[1] = src0->u[1] + src1->u[1];
4493 dst->u[2] = src0->u[2] + src1->u[2];
4494 dst->u[3] = src0->u[3] + src1->u[3];
4495 }
4496
4497 static void
micro_udiv(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4498 micro_udiv(union tgsi_exec_channel *dst,
4499 const union tgsi_exec_channel *src0,
4500 const union tgsi_exec_channel *src1)
4501 {
4502 dst->u[0] = src1->u[0] ? src0->u[0] / src1->u[0] : ~0u;
4503 dst->u[1] = src1->u[1] ? src0->u[1] / src1->u[1] : ~0u;
4504 dst->u[2] = src1->u[2] ? src0->u[2] / src1->u[2] : ~0u;
4505 dst->u[3] = src1->u[3] ? src0->u[3] / src1->u[3] : ~0u;
4506 }
4507
4508 static void
micro_umad(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4509 micro_umad(union tgsi_exec_channel *dst,
4510 const union tgsi_exec_channel *src0,
4511 const union tgsi_exec_channel *src1,
4512 const union tgsi_exec_channel *src2)
4513 {
4514 dst->u[0] = src0->u[0] * src1->u[0] + src2->u[0];
4515 dst->u[1] = src0->u[1] * src1->u[1] + src2->u[1];
4516 dst->u[2] = src0->u[2] * src1->u[2] + src2->u[2];
4517 dst->u[3] = src0->u[3] * src1->u[3] + src2->u[3];
4518 }
4519
4520 static void
micro_umax(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4521 micro_umax(union tgsi_exec_channel *dst,
4522 const union tgsi_exec_channel *src0,
4523 const union tgsi_exec_channel *src1)
4524 {
4525 dst->u[0] = src0->u[0] > src1->u[0] ? src0->u[0] : src1->u[0];
4526 dst->u[1] = src0->u[1] > src1->u[1] ? src0->u[1] : src1->u[1];
4527 dst->u[2] = src0->u[2] > src1->u[2] ? src0->u[2] : src1->u[2];
4528 dst->u[3] = src0->u[3] > src1->u[3] ? src0->u[3] : src1->u[3];
4529 }
4530
4531 static void
micro_umin(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4532 micro_umin(union tgsi_exec_channel *dst,
4533 const union tgsi_exec_channel *src0,
4534 const union tgsi_exec_channel *src1)
4535 {
4536 dst->u[0] = src0->u[0] < src1->u[0] ? src0->u[0] : src1->u[0];
4537 dst->u[1] = src0->u[1] < src1->u[1] ? src0->u[1] : src1->u[1];
4538 dst->u[2] = src0->u[2] < src1->u[2] ? src0->u[2] : src1->u[2];
4539 dst->u[3] = src0->u[3] < src1->u[3] ? src0->u[3] : src1->u[3];
4540 }
4541
4542 static void
micro_umod(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4543 micro_umod(union tgsi_exec_channel *dst,
4544 const union tgsi_exec_channel *src0,
4545 const union tgsi_exec_channel *src1)
4546 {
4547 dst->u[0] = src1->u[0] ? src0->u[0] % src1->u[0] : ~0u;
4548 dst->u[1] = src1->u[1] ? src0->u[1] % src1->u[1] : ~0u;
4549 dst->u[2] = src1->u[2] ? src0->u[2] % src1->u[2] : ~0u;
4550 dst->u[3] = src1->u[3] ? src0->u[3] % src1->u[3] : ~0u;
4551 }
4552
4553 static void
micro_umul(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4554 micro_umul(union tgsi_exec_channel *dst,
4555 const union tgsi_exec_channel *src0,
4556 const union tgsi_exec_channel *src1)
4557 {
4558 dst->u[0] = src0->u[0] * src1->u[0];
4559 dst->u[1] = src0->u[1] * src1->u[1];
4560 dst->u[2] = src0->u[2] * src1->u[2];
4561 dst->u[3] = src0->u[3] * src1->u[3];
4562 }
4563
4564 static void
micro_imul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4565 micro_imul_hi(union tgsi_exec_channel *dst,
4566 const union tgsi_exec_channel *src0,
4567 const union tgsi_exec_channel *src1)
4568 {
4569 #define I64M(x, y) ((((int64_t)x) * ((int64_t)y)) >> 32)
4570 dst->i[0] = I64M(src0->i[0], src1->i[0]);
4571 dst->i[1] = I64M(src0->i[1], src1->i[1]);
4572 dst->i[2] = I64M(src0->i[2], src1->i[2]);
4573 dst->i[3] = I64M(src0->i[3], src1->i[3]);
4574 #undef I64M
4575 }
4576
4577 static void
micro_umul_hi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4578 micro_umul_hi(union tgsi_exec_channel *dst,
4579 const union tgsi_exec_channel *src0,
4580 const union tgsi_exec_channel *src1)
4581 {
4582 #define U64M(x, y) ((((uint64_t)x) * ((uint64_t)y)) >> 32)
4583 dst->u[0] = U64M(src0->u[0], src1->u[0]);
4584 dst->u[1] = U64M(src0->u[1], src1->u[1]);
4585 dst->u[2] = U64M(src0->u[2], src1->u[2]);
4586 dst->u[3] = U64M(src0->u[3], src1->u[3]);
4587 #undef U64M
4588 }
4589
4590 static void
micro_useq(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4591 micro_useq(union tgsi_exec_channel *dst,
4592 const union tgsi_exec_channel *src0,
4593 const union tgsi_exec_channel *src1)
4594 {
4595 dst->u[0] = src0->u[0] == src1->u[0] ? ~0 : 0;
4596 dst->u[1] = src0->u[1] == src1->u[1] ? ~0 : 0;
4597 dst->u[2] = src0->u[2] == src1->u[2] ? ~0 : 0;
4598 dst->u[3] = src0->u[3] == src1->u[3] ? ~0 : 0;
4599 }
4600
4601 static void
micro_usge(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4602 micro_usge(union tgsi_exec_channel *dst,
4603 const union tgsi_exec_channel *src0,
4604 const union tgsi_exec_channel *src1)
4605 {
4606 dst->u[0] = src0->u[0] >= src1->u[0] ? ~0 : 0;
4607 dst->u[1] = src0->u[1] >= src1->u[1] ? ~0 : 0;
4608 dst->u[2] = src0->u[2] >= src1->u[2] ? ~0 : 0;
4609 dst->u[3] = src0->u[3] >= src1->u[3] ? ~0 : 0;
4610 }
4611
4612 static void
micro_ushr(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4613 micro_ushr(union tgsi_exec_channel *dst,
4614 const union tgsi_exec_channel *src0,
4615 const union tgsi_exec_channel *src1)
4616 {
4617 unsigned masked_count;
4618 masked_count = src1->u[0] & 0x1f;
4619 dst->u[0] = src0->u[0] >> masked_count;
4620 masked_count = src1->u[1] & 0x1f;
4621 dst->u[1] = src0->u[1] >> masked_count;
4622 masked_count = src1->u[2] & 0x1f;
4623 dst->u[2] = src0->u[2] >> masked_count;
4624 masked_count = src1->u[3] & 0x1f;
4625 dst->u[3] = src0->u[3] >> masked_count;
4626 }
4627
4628 static void
micro_uslt(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4629 micro_uslt(union tgsi_exec_channel *dst,
4630 const union tgsi_exec_channel *src0,
4631 const union tgsi_exec_channel *src1)
4632 {
4633 dst->u[0] = src0->u[0] < src1->u[0] ? ~0 : 0;
4634 dst->u[1] = src0->u[1] < src1->u[1] ? ~0 : 0;
4635 dst->u[2] = src0->u[2] < src1->u[2] ? ~0 : 0;
4636 dst->u[3] = src0->u[3] < src1->u[3] ? ~0 : 0;
4637 }
4638
4639 static void
micro_usne(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1)4640 micro_usne(union tgsi_exec_channel *dst,
4641 const union tgsi_exec_channel *src0,
4642 const union tgsi_exec_channel *src1)
4643 {
4644 dst->u[0] = src0->u[0] != src1->u[0] ? ~0 : 0;
4645 dst->u[1] = src0->u[1] != src1->u[1] ? ~0 : 0;
4646 dst->u[2] = src0->u[2] != src1->u[2] ? ~0 : 0;
4647 dst->u[3] = src0->u[3] != src1->u[3] ? ~0 : 0;
4648 }
4649
4650 static void
micro_uarl(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4651 micro_uarl(union tgsi_exec_channel *dst,
4652 const union tgsi_exec_channel *src)
4653 {
4654 dst->i[0] = src->u[0];
4655 dst->i[1] = src->u[1];
4656 dst->i[2] = src->u[2];
4657 dst->i[3] = src->u[3];
4658 }
4659
4660 /**
4661 * Signed bitfield extract (i.e. sign-extend the extracted bits)
4662 */
4663 static void
micro_ibfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4664 micro_ibfe(union tgsi_exec_channel *dst,
4665 const union tgsi_exec_channel *src0,
4666 const union tgsi_exec_channel *src1,
4667 const union tgsi_exec_channel *src2)
4668 {
4669 int i;
4670 for (i = 0; i < 4; i++) {
4671 int width = src2->i[i];
4672 int offset = src1->i[i] & 0x1f;
4673 if (width == 32 && offset == 0) {
4674 dst->i[i] = src0->i[i];
4675 continue;
4676 }
4677 width &= 0x1f;
4678 if (width == 0)
4679 dst->i[i] = 0;
4680 else if (width + offset < 32)
4681 dst->i[i] = (src0->i[i] << (32 - width - offset)) >> (32 - width);
4682 else
4683 dst->i[i] = src0->i[i] >> offset;
4684 }
4685 }
4686
4687 /**
4688 * Unsigned bitfield extract
4689 */
4690 static void
micro_ubfe(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2)4691 micro_ubfe(union tgsi_exec_channel *dst,
4692 const union tgsi_exec_channel *src0,
4693 const union tgsi_exec_channel *src1,
4694 const union tgsi_exec_channel *src2)
4695 {
4696 int i;
4697 for (i = 0; i < 4; i++) {
4698 int width = src2->u[i];
4699 int offset = src1->u[i] & 0x1f;
4700 if (width == 32 && offset == 0) {
4701 dst->u[i] = src0->u[i];
4702 continue;
4703 }
4704 width &= 0x1f;
4705 if (width == 0)
4706 dst->u[i] = 0;
4707 else if (width + offset < 32)
4708 dst->u[i] = (src0->u[i] << (32 - width - offset)) >> (32 - width);
4709 else
4710 dst->u[i] = src0->u[i] >> offset;
4711 }
4712 }
4713
4714 /**
4715 * Bitfield insert: copy low bits from src1 into a region of src0.
4716 */
4717 static void
micro_bfi(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src0,const union tgsi_exec_channel * src1,const union tgsi_exec_channel * src2,const union tgsi_exec_channel * src3)4718 micro_bfi(union tgsi_exec_channel *dst,
4719 const union tgsi_exec_channel *src0,
4720 const union tgsi_exec_channel *src1,
4721 const union tgsi_exec_channel *src2,
4722 const union tgsi_exec_channel *src3)
4723 {
4724 int i;
4725 for (i = 0; i < 4; i++) {
4726 int width = src3->u[i];
4727 int offset = src2->u[i] & 0x1f;
4728 if (width == 32) {
4729 dst->u[i] = src1->u[i];
4730 } else {
4731 int bitmask = ((1 << width) - 1) << offset;
4732 dst->u[i] = ((src1->u[i] << offset) & bitmask) | (src0->u[i] & ~bitmask);
4733 }
4734 }
4735 }
4736
4737 static void
micro_brev(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4738 micro_brev(union tgsi_exec_channel *dst,
4739 const union tgsi_exec_channel *src)
4740 {
4741 dst->u[0] = util_bitreverse(src->u[0]);
4742 dst->u[1] = util_bitreverse(src->u[1]);
4743 dst->u[2] = util_bitreverse(src->u[2]);
4744 dst->u[3] = util_bitreverse(src->u[3]);
4745 }
4746
4747 static void
micro_popc(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4748 micro_popc(union tgsi_exec_channel *dst,
4749 const union tgsi_exec_channel *src)
4750 {
4751 dst->u[0] = util_bitcount(src->u[0]);
4752 dst->u[1] = util_bitcount(src->u[1]);
4753 dst->u[2] = util_bitcount(src->u[2]);
4754 dst->u[3] = util_bitcount(src->u[3]);
4755 }
4756
4757 static void
micro_lsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4758 micro_lsb(union tgsi_exec_channel *dst,
4759 const union tgsi_exec_channel *src)
4760 {
4761 dst->i[0] = ffs(src->u[0]) - 1;
4762 dst->i[1] = ffs(src->u[1]) - 1;
4763 dst->i[2] = ffs(src->u[2]) - 1;
4764 dst->i[3] = ffs(src->u[3]) - 1;
4765 }
4766
4767 static void
micro_imsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4768 micro_imsb(union tgsi_exec_channel *dst,
4769 const union tgsi_exec_channel *src)
4770 {
4771 dst->i[0] = util_last_bit_signed(src->i[0]) - 1;
4772 dst->i[1] = util_last_bit_signed(src->i[1]) - 1;
4773 dst->i[2] = util_last_bit_signed(src->i[2]) - 1;
4774 dst->i[3] = util_last_bit_signed(src->i[3]) - 1;
4775 }
4776
4777 static void
micro_umsb(union tgsi_exec_channel * dst,const union tgsi_exec_channel * src)4778 micro_umsb(union tgsi_exec_channel *dst,
4779 const union tgsi_exec_channel *src)
4780 {
4781 dst->i[0] = util_last_bit(src->u[0]) - 1;
4782 dst->i[1] = util_last_bit(src->u[1]) - 1;
4783 dst->i[2] = util_last_bit(src->u[2]) - 1;
4784 dst->i[3] = util_last_bit(src->u[3]) - 1;
4785 }
4786
4787
4788 static void
exec_interp_at_sample(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4789 exec_interp_at_sample(struct tgsi_exec_machine *mach,
4790 const struct tgsi_full_instruction *inst)
4791 {
4792 union tgsi_exec_channel index;
4793 union tgsi_exec_channel index2D;
4794 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4795 const struct tgsi_full_src_register *reg = &inst->Src[0];
4796
4797 assert(reg->Register.File == TGSI_FILE_INPUT);
4798 assert(inst->Src[1].Register.File == TGSI_FILE_IMMEDIATE);
4799
4800 get_index_registers(mach, reg, &index, &index2D);
4801 float sample = mach->Imms[inst->Src[1].Register.Index][inst->Src[1].Register.SwizzleX];
4802
4803 /* Short cut: sample 0 is like a normal fetch */
4804 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4805 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4806 continue;
4807
4808 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4809 &result[chan]);
4810 if (sample != 0.0f) {
4811
4812 /* TODO: define the samples > 0, but so far we only do fake MSAA */
4813 float x = 0;
4814 float y = 0;
4815
4816 unsigned pos = index2D.i[chan] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[chan];
4817 assert(pos >= 0);
4818 assert(pos < TGSI_MAX_PRIM_VERTICES * PIPE_MAX_ATTRIBS);
4819 mach->InputSampleOffsetApply[pos](mach, pos, chan, x, y, &result[chan]);
4820 }
4821 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4822 }
4823 }
4824
4825
4826 static void
exec_interp_at_offset(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4827 exec_interp_at_offset(struct tgsi_exec_machine *mach,
4828 const struct tgsi_full_instruction *inst)
4829 {
4830 union tgsi_exec_channel index;
4831 union tgsi_exec_channel index2D;
4832 union tgsi_exec_channel ofsx;
4833 union tgsi_exec_channel ofsy;
4834 const struct tgsi_full_src_register *reg = &inst->Src[0];
4835
4836 assert(reg->Register.File == TGSI_FILE_INPUT);
4837
4838 get_index_registers(mach, reg, &index, &index2D);
4839 unsigned pos = index2D.i[0] * TGSI_EXEC_MAX_INPUT_ATTRIBS + index.i[0];
4840
4841 fetch_source(mach, &ofsx, &inst->Src[1], TGSI_CHAN_X, TGSI_EXEC_DATA_FLOAT);
4842 fetch_source(mach, &ofsy, &inst->Src[1], TGSI_CHAN_Y, TGSI_EXEC_DATA_FLOAT);
4843
4844 for (int chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4845 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4846 continue;
4847 union tgsi_exec_channel result;
4848 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D, &result);
4849 mach->InputSampleOffsetApply[pos](mach, pos, chan, ofsx.f[chan], ofsy.f[chan], &result);
4850 store_dest(mach, &result, &inst->Dst[0], inst, chan);
4851 }
4852 }
4853
4854
4855 static void
exec_interp_at_centroid(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst)4856 exec_interp_at_centroid(struct tgsi_exec_machine *mach,
4857 const struct tgsi_full_instruction *inst)
4858 {
4859 union tgsi_exec_channel index;
4860 union tgsi_exec_channel index2D;
4861 union tgsi_exec_channel result[TGSI_NUM_CHANNELS];
4862 const struct tgsi_full_src_register *reg = &inst->Src[0];
4863
4864 assert(reg->Register.File == TGSI_FILE_INPUT);
4865 get_index_registers(mach, reg, &index, &index2D);
4866
4867 for (unsigned chan = 0; chan < TGSI_NUM_CHANNELS; chan++) {
4868 if (!(inst->Dst[0].Register.WriteMask & (1 << chan)))
4869 continue;
4870
4871 /* Here we should add the change to use a sample that lies within the
4872 * primitive (Section 15.2):
4873 *
4874 * "When interpolating variables declared using centroid in ,
4875 * the variable is sampled at a location within the pixel covered
4876 * by the primitive generating the fragment.
4877 * ...
4878 * The built-in functions interpolateAtCentroid ... will sample
4879 * variables as though they were declared with the centroid ...
4880 * qualifier[s]."
4881 *
4882 * Since we only support 1 sample currently, this is just a pass-through.
4883 */
4884 fetch_src_file_channel(mach, TGSI_FILE_INPUT, chan, &index, &index2D,
4885 &result[chan]);
4886 store_dest(mach, &result[chan], &inst->Dst[0], inst, chan);
4887 }
4888
4889 }
4890
4891
4892 /**
4893 * Execute a TGSI instruction.
4894 * Returns TRUE if a barrier instruction is hit,
4895 * otherwise FALSE.
4896 */
4897 static bool
exec_instruction(struct tgsi_exec_machine * mach,const struct tgsi_full_instruction * inst,int * pc)4898 exec_instruction(
4899 struct tgsi_exec_machine *mach,
4900 const struct tgsi_full_instruction *inst,
4901 int *pc )
4902 {
4903 union tgsi_exec_channel r[10];
4904
4905 (*pc)++;
4906
4907 switch (inst->Instruction.Opcode) {
4908 case TGSI_OPCODE_ARL:
4909 exec_vector_unary(mach, inst, micro_arl, TGSI_EXEC_DATA_FLOAT);
4910 break;
4911
4912 case TGSI_OPCODE_MOV:
4913 exec_vector_unary(mach, inst, micro_mov, TGSI_EXEC_DATA_FLOAT);
4914 break;
4915
4916 case TGSI_OPCODE_LIT:
4917 exec_lit(mach, inst);
4918 break;
4919
4920 case TGSI_OPCODE_RCP:
4921 exec_scalar_unary(mach, inst, micro_rcp, TGSI_EXEC_DATA_FLOAT);
4922 break;
4923
4924 case TGSI_OPCODE_RSQ:
4925 exec_scalar_unary(mach, inst, micro_rsq, TGSI_EXEC_DATA_FLOAT);
4926 break;
4927
4928 case TGSI_OPCODE_EXP:
4929 exec_exp(mach, inst);
4930 break;
4931
4932 case TGSI_OPCODE_LOG:
4933 exec_log(mach, inst);
4934 break;
4935
4936 case TGSI_OPCODE_MUL:
4937 exec_vector_binary(mach, inst, micro_mul, TGSI_EXEC_DATA_FLOAT);
4938 break;
4939
4940 case TGSI_OPCODE_ADD:
4941 exec_vector_binary(mach, inst, micro_add, TGSI_EXEC_DATA_FLOAT);
4942 break;
4943
4944 case TGSI_OPCODE_DP3:
4945 exec_dp3(mach, inst);
4946 break;
4947
4948 case TGSI_OPCODE_DP4:
4949 exec_dp4(mach, inst);
4950 break;
4951
4952 case TGSI_OPCODE_DST:
4953 exec_dst(mach, inst);
4954 break;
4955
4956 case TGSI_OPCODE_MIN:
4957 exec_vector_binary(mach, inst, micro_min, TGSI_EXEC_DATA_FLOAT);
4958 break;
4959
4960 case TGSI_OPCODE_MAX:
4961 exec_vector_binary(mach, inst, micro_max, TGSI_EXEC_DATA_FLOAT);
4962 break;
4963
4964 case TGSI_OPCODE_SLT:
4965 exec_vector_binary(mach, inst, micro_slt, TGSI_EXEC_DATA_FLOAT);
4966 break;
4967
4968 case TGSI_OPCODE_SGE:
4969 exec_vector_binary(mach, inst, micro_sge, TGSI_EXEC_DATA_FLOAT);
4970 break;
4971
4972 case TGSI_OPCODE_MAD:
4973 exec_vector_trinary(mach, inst, micro_mad, TGSI_EXEC_DATA_FLOAT);
4974 break;
4975
4976 case TGSI_OPCODE_LRP:
4977 exec_vector_trinary(mach, inst, micro_lrp, TGSI_EXEC_DATA_FLOAT);
4978 break;
4979
4980 case TGSI_OPCODE_SQRT:
4981 exec_scalar_unary(mach, inst, micro_sqrt, TGSI_EXEC_DATA_FLOAT);
4982 break;
4983
4984 case TGSI_OPCODE_FRC:
4985 exec_vector_unary(mach, inst, micro_frc, TGSI_EXEC_DATA_FLOAT);
4986 break;
4987
4988 case TGSI_OPCODE_FLR:
4989 exec_vector_unary(mach, inst, micro_flr, TGSI_EXEC_DATA_FLOAT);
4990 break;
4991
4992 case TGSI_OPCODE_ROUND:
4993 exec_vector_unary(mach, inst, micro_rnd, TGSI_EXEC_DATA_FLOAT);
4994 break;
4995
4996 case TGSI_OPCODE_EX2:
4997 exec_scalar_unary(mach, inst, micro_exp2, TGSI_EXEC_DATA_FLOAT);
4998 break;
4999
5000 case TGSI_OPCODE_LG2:
5001 exec_scalar_unary(mach, inst, micro_lg2, TGSI_EXEC_DATA_FLOAT);
5002 break;
5003
5004 case TGSI_OPCODE_POW:
5005 exec_scalar_binary(mach, inst, micro_pow, TGSI_EXEC_DATA_FLOAT);
5006 break;
5007
5008 case TGSI_OPCODE_LDEXP:
5009 exec_vector_binary(mach, inst, micro_ldexp, TGSI_EXEC_DATA_FLOAT);
5010 break;
5011
5012 case TGSI_OPCODE_COS:
5013 exec_scalar_unary(mach, inst, micro_cos, TGSI_EXEC_DATA_FLOAT);
5014 break;
5015
5016 case TGSI_OPCODE_DDX_FINE:
5017 exec_vector_unary(mach, inst, micro_ddx_fine, TGSI_EXEC_DATA_FLOAT);
5018 break;
5019
5020 case TGSI_OPCODE_DDX:
5021 exec_vector_unary(mach, inst, micro_ddx, TGSI_EXEC_DATA_FLOAT);
5022 break;
5023
5024 case TGSI_OPCODE_DDY_FINE:
5025 exec_vector_unary(mach, inst, micro_ddy_fine, TGSI_EXEC_DATA_FLOAT);
5026 break;
5027
5028 case TGSI_OPCODE_DDY:
5029 exec_vector_unary(mach, inst, micro_ddy, TGSI_EXEC_DATA_FLOAT);
5030 break;
5031
5032 case TGSI_OPCODE_KILL:
5033 exec_kill (mach);
5034 break;
5035
5036 case TGSI_OPCODE_KILL_IF:
5037 exec_kill_if (mach, inst);
5038 break;
5039
5040 case TGSI_OPCODE_PK2H:
5041 exec_pk2h(mach, inst);
5042 break;
5043
5044 case TGSI_OPCODE_PK2US:
5045 assert (0);
5046 break;
5047
5048 case TGSI_OPCODE_PK4B:
5049 assert (0);
5050 break;
5051
5052 case TGSI_OPCODE_PK4UB:
5053 assert (0);
5054 break;
5055
5056 case TGSI_OPCODE_SEQ:
5057 exec_vector_binary(mach, inst, micro_seq, TGSI_EXEC_DATA_FLOAT);
5058 break;
5059
5060 case TGSI_OPCODE_SGT:
5061 exec_vector_binary(mach, inst, micro_sgt, TGSI_EXEC_DATA_FLOAT);
5062 break;
5063
5064 case TGSI_OPCODE_SIN:
5065 exec_scalar_unary(mach, inst, micro_sin, TGSI_EXEC_DATA_FLOAT);
5066 break;
5067
5068 case TGSI_OPCODE_SLE:
5069 exec_vector_binary(mach, inst, micro_sle, TGSI_EXEC_DATA_FLOAT);
5070 break;
5071
5072 case TGSI_OPCODE_SNE:
5073 exec_vector_binary(mach, inst, micro_sne, TGSI_EXEC_DATA_FLOAT);
5074 break;
5075
5076 case TGSI_OPCODE_TEX:
5077 /* simple texture lookup */
5078 /* src[0] = texcoord */
5079 /* src[1] = sampler unit */
5080 exec_tex(mach, inst, TEX_MODIFIER_NONE, 1);
5081 break;
5082
5083 case TGSI_OPCODE_TXB:
5084 /* Texture lookup with lod bias */
5085 /* src[0] = texcoord (src[0].w = LOD bias) */
5086 /* src[1] = sampler unit */
5087 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 1);
5088 break;
5089
5090 case TGSI_OPCODE_TXD:
5091 /* Texture lookup with explict partial derivatives */
5092 /* src[0] = texcoord */
5093 /* src[1] = d[strq]/dx */
5094 /* src[2] = d[strq]/dy */
5095 /* src[3] = sampler unit */
5096 exec_txd(mach, inst);
5097 break;
5098
5099 case TGSI_OPCODE_TXL:
5100 /* Texture lookup with explit LOD */
5101 /* src[0] = texcoord (src[0].w = LOD) */
5102 /* src[1] = sampler unit */
5103 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 1);
5104 break;
5105
5106 case TGSI_OPCODE_TXP:
5107 /* Texture lookup with projection */
5108 /* src[0] = texcoord (src[0].w = projection) */
5109 /* src[1] = sampler unit */
5110 exec_tex(mach, inst, TEX_MODIFIER_PROJECTED, 1);
5111 break;
5112
5113 case TGSI_OPCODE_TG4:
5114 /* src[0] = texcoord */
5115 /* src[1] = component */
5116 /* src[2] = sampler unit */
5117 exec_tex(mach, inst, TEX_MODIFIER_GATHER, 2);
5118 break;
5119
5120 case TGSI_OPCODE_LODQ:
5121 /* src[0] = texcoord */
5122 /* src[1] = sampler unit */
5123 exec_lodq(mach, inst);
5124 break;
5125
5126 case TGSI_OPCODE_UP2H:
5127 exec_up2h(mach, inst);
5128 break;
5129
5130 case TGSI_OPCODE_UP2US:
5131 assert (0);
5132 break;
5133
5134 case TGSI_OPCODE_UP4B:
5135 assert (0);
5136 break;
5137
5138 case TGSI_OPCODE_UP4UB:
5139 assert (0);
5140 break;
5141
5142 case TGSI_OPCODE_ARR:
5143 exec_vector_unary(mach, inst, micro_arr, TGSI_EXEC_DATA_FLOAT);
5144 break;
5145
5146 case TGSI_OPCODE_CAL:
5147 /* skip the call if no execution channels are enabled */
5148 if (mach->ExecMask) {
5149 /* do the call */
5150
5151 /* First, record the depths of the execution stacks.
5152 * This is important for deeply nested/looped return statements.
5153 * We have to unwind the stacks by the correct amount. For a
5154 * real code generator, we could determine the number of entries
5155 * to pop off each stack with simple static analysis and avoid
5156 * implementing this data structure at run time.
5157 */
5158 mach->CallStack[mach->CallStackTop].CondStackTop = mach->CondStackTop;
5159 mach->CallStack[mach->CallStackTop].LoopStackTop = mach->LoopStackTop;
5160 mach->CallStack[mach->CallStackTop].ContStackTop = mach->ContStackTop;
5161 mach->CallStack[mach->CallStackTop].SwitchStackTop = mach->SwitchStackTop;
5162 mach->CallStack[mach->CallStackTop].BreakStackTop = mach->BreakStackTop;
5163 /* note that PC was already incremented above */
5164 mach->CallStack[mach->CallStackTop].ReturnAddr = *pc;
5165
5166 mach->CallStackTop++;
5167
5168 /* Second, push the Cond, Loop, Cont, Func stacks */
5169 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5170 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5171 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5172 assert(mach->SwitchStackTop < TGSI_EXEC_MAX_SWITCH_NESTING);
5173 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5174 assert(mach->FuncStackTop < TGSI_EXEC_MAX_CALL_NESTING);
5175
5176 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5177 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5178 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5179 mach->SwitchStack[mach->SwitchStackTop++] = mach->Switch;
5180 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5181 mach->FuncStack[mach->FuncStackTop++] = mach->FuncMask;
5182
5183 /* Finally, jump to the subroutine. The label is a pointer
5184 * (an instruction number) to the BGNSUB instruction.
5185 */
5186 *pc = inst->Label.Label;
5187 assert(mach->Instructions[*pc].Instruction.Opcode
5188 == TGSI_OPCODE_BGNSUB);
5189 }
5190 break;
5191
5192 case TGSI_OPCODE_RET:
5193 mach->FuncMask &= ~mach->ExecMask;
5194 UPDATE_EXEC_MASK(mach);
5195
5196 if (mach->FuncMask == 0x0) {
5197 /* really return now (otherwise, keep executing */
5198
5199 if (mach->CallStackTop == 0) {
5200 /* returning from main() */
5201 mach->CondStackTop = 0;
5202 mach->LoopStackTop = 0;
5203 mach->ContStackTop = 0;
5204 mach->LoopLabelStackTop = 0;
5205 mach->SwitchStackTop = 0;
5206 mach->BreakStackTop = 0;
5207 *pc = -1;
5208 return false;
5209 }
5210
5211 assert(mach->CallStackTop > 0);
5212 mach->CallStackTop--;
5213
5214 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5215 mach->CondMask = mach->CondStack[mach->CondStackTop];
5216
5217 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5218 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5219
5220 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5221 mach->ContMask = mach->ContStack[mach->ContStackTop];
5222
5223 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5224 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5225
5226 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5227 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5228
5229 assert(mach->FuncStackTop > 0);
5230 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5231
5232 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5233
5234 UPDATE_EXEC_MASK(mach);
5235 }
5236 break;
5237
5238 case TGSI_OPCODE_SSG:
5239 exec_vector_unary(mach, inst, micro_sgn, TGSI_EXEC_DATA_FLOAT);
5240 break;
5241
5242 case TGSI_OPCODE_CMP:
5243 exec_vector_trinary(mach, inst, micro_cmp, TGSI_EXEC_DATA_FLOAT);
5244 break;
5245
5246 case TGSI_OPCODE_DIV:
5247 exec_vector_binary(mach, inst, micro_div, TGSI_EXEC_DATA_FLOAT);
5248 break;
5249
5250 case TGSI_OPCODE_DP2:
5251 exec_dp2(mach, inst);
5252 break;
5253
5254 case TGSI_OPCODE_IF:
5255 /* push CondMask */
5256 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5257 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5258 FETCH( &r[0], 0, TGSI_CHAN_X );
5259 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5260 if (!r[0].f[i])
5261 mach->CondMask &= ~(1 << i);
5262 }
5263 UPDATE_EXEC_MASK(mach);
5264 /* If no channels are taking the then branch, jump to ELSE. */
5265 if (!mach->CondMask)
5266 *pc = inst->Label.Label;
5267 break;
5268
5269 case TGSI_OPCODE_UIF:
5270 /* push CondMask */
5271 assert(mach->CondStackTop < TGSI_EXEC_MAX_COND_NESTING);
5272 mach->CondStack[mach->CondStackTop++] = mach->CondMask;
5273 IFETCH( &r[0], 0, TGSI_CHAN_X );
5274 for (int i = 0; i < TGSI_QUAD_SIZE; i++) {
5275 if (!r[0].u[i])
5276 mach->CondMask &= ~(1 << i);
5277 }
5278 UPDATE_EXEC_MASK(mach);
5279 /* If no channels are taking the then branch, jump to ELSE. */
5280 if (!mach->CondMask)
5281 *pc = inst->Label.Label;
5282 break;
5283
5284 case TGSI_OPCODE_ELSE:
5285 /* invert CondMask wrt previous mask */
5286 {
5287 unsigned prevMask;
5288 assert(mach->CondStackTop > 0);
5289 prevMask = mach->CondStack[mach->CondStackTop - 1];
5290 mach->CondMask = ~mach->CondMask & prevMask;
5291 UPDATE_EXEC_MASK(mach);
5292
5293 /* If no channels are taking ELSE, jump to ENDIF */
5294 if (!mach->CondMask)
5295 *pc = inst->Label.Label;
5296 }
5297 break;
5298
5299 case TGSI_OPCODE_ENDIF:
5300 /* pop CondMask */
5301 assert(mach->CondStackTop > 0);
5302 mach->CondMask = mach->CondStack[--mach->CondStackTop];
5303 UPDATE_EXEC_MASK(mach);
5304 break;
5305
5306 case TGSI_OPCODE_END:
5307 /* make sure we end primitives which haven't
5308 * been explicitly emitted */
5309 conditional_emit_primitive(mach);
5310 /* halt execution */
5311 *pc = -1;
5312 break;
5313
5314 case TGSI_OPCODE_CEIL:
5315 exec_vector_unary(mach, inst, micro_ceil, TGSI_EXEC_DATA_FLOAT);
5316 break;
5317
5318 case TGSI_OPCODE_I2F:
5319 exec_vector_unary(mach, inst, micro_i2f, TGSI_EXEC_DATA_INT);
5320 break;
5321
5322 case TGSI_OPCODE_NOT:
5323 exec_vector_unary(mach, inst, micro_not, TGSI_EXEC_DATA_UINT);
5324 break;
5325
5326 case TGSI_OPCODE_TRUNC:
5327 exec_vector_unary(mach, inst, micro_trunc, TGSI_EXEC_DATA_FLOAT);
5328 break;
5329
5330 case TGSI_OPCODE_SHL:
5331 exec_vector_binary(mach, inst, micro_shl, TGSI_EXEC_DATA_UINT);
5332 break;
5333
5334 case TGSI_OPCODE_AND:
5335 exec_vector_binary(mach, inst, micro_and, TGSI_EXEC_DATA_UINT);
5336 break;
5337
5338 case TGSI_OPCODE_OR:
5339 exec_vector_binary(mach, inst, micro_or, TGSI_EXEC_DATA_UINT);
5340 break;
5341
5342 case TGSI_OPCODE_MOD:
5343 exec_vector_binary(mach, inst, micro_mod, TGSI_EXEC_DATA_INT);
5344 break;
5345
5346 case TGSI_OPCODE_XOR:
5347 exec_vector_binary(mach, inst, micro_xor, TGSI_EXEC_DATA_UINT);
5348 break;
5349
5350 case TGSI_OPCODE_TXF:
5351 exec_txf(mach, inst);
5352 break;
5353
5354 case TGSI_OPCODE_TXQ:
5355 exec_txq(mach, inst);
5356 break;
5357
5358 case TGSI_OPCODE_EMIT:
5359 emit_vertex(mach, inst);
5360 break;
5361
5362 case TGSI_OPCODE_ENDPRIM:
5363 emit_primitive(mach, inst);
5364 break;
5365
5366 case TGSI_OPCODE_BGNLOOP:
5367 /* push LoopMask and ContMasks */
5368 assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5369 assert(mach->ContStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5370 assert(mach->LoopLabelStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
5371 assert(mach->BreakStackTop < TGSI_EXEC_MAX_BREAK_STACK);
5372
5373 mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
5374 mach->ContStack[mach->ContStackTop++] = mach->ContMask;
5375 mach->LoopLabelStack[mach->LoopLabelStackTop++] = *pc - 1;
5376 mach->BreakStack[mach->BreakStackTop++] = mach->BreakType;
5377 mach->BreakType = TGSI_EXEC_BREAK_INSIDE_LOOP;
5378 break;
5379
5380 case TGSI_OPCODE_ENDLOOP:
5381 /* Restore ContMask, but don't pop */
5382 assert(mach->ContStackTop > 0);
5383 mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
5384 UPDATE_EXEC_MASK(mach);
5385 if (mach->ExecMask) {
5386 /* repeat loop: jump to instruction just past BGNLOOP */
5387 assert(mach->LoopLabelStackTop > 0);
5388 *pc = mach->LoopLabelStack[mach->LoopLabelStackTop - 1] + 1;
5389 }
5390 else {
5391 /* exit loop: pop LoopMask */
5392 assert(mach->LoopStackTop > 0);
5393 mach->LoopMask = mach->LoopStack[--mach->LoopStackTop];
5394 /* pop ContMask */
5395 assert(mach->ContStackTop > 0);
5396 mach->ContMask = mach->ContStack[--mach->ContStackTop];
5397 assert(mach->LoopLabelStackTop > 0);
5398 --mach->LoopLabelStackTop;
5399
5400 mach->BreakType = mach->BreakStack[--mach->BreakStackTop];
5401 }
5402 UPDATE_EXEC_MASK(mach);
5403 break;
5404
5405 case TGSI_OPCODE_BRK:
5406 exec_break(mach);
5407 break;
5408
5409 case TGSI_OPCODE_CONT:
5410 /* turn off cont channels for each enabled exec channel */
5411 mach->ContMask &= ~mach->ExecMask;
5412 /* Todo: if mach->LoopMask == 0, jump to end of loop */
5413 UPDATE_EXEC_MASK(mach);
5414 break;
5415
5416 case TGSI_OPCODE_BGNSUB:
5417 /* no-op */
5418 break;
5419
5420 case TGSI_OPCODE_ENDSUB:
5421 /*
5422 * XXX: This really should be a no-op. We should never reach this opcode.
5423 */
5424
5425 assert(mach->CallStackTop > 0);
5426 mach->CallStackTop--;
5427
5428 mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
5429 mach->CondMask = mach->CondStack[mach->CondStackTop];
5430
5431 mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
5432 mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
5433
5434 mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
5435 mach->ContMask = mach->ContStack[mach->ContStackTop];
5436
5437 mach->SwitchStackTop = mach->CallStack[mach->CallStackTop].SwitchStackTop;
5438 mach->Switch = mach->SwitchStack[mach->SwitchStackTop];
5439
5440 mach->BreakStackTop = mach->CallStack[mach->CallStackTop].BreakStackTop;
5441 mach->BreakType = mach->BreakStack[mach->BreakStackTop];
5442
5443 assert(mach->FuncStackTop > 0);
5444 mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
5445
5446 *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
5447
5448 UPDATE_EXEC_MASK(mach);
5449 break;
5450
5451 case TGSI_OPCODE_NOP:
5452 break;
5453
5454 case TGSI_OPCODE_F2I:
5455 exec_vector_unary(mach, inst, micro_f2i, TGSI_EXEC_DATA_FLOAT);
5456 break;
5457
5458 case TGSI_OPCODE_FSEQ:
5459 exec_vector_binary(mach, inst, micro_fseq, TGSI_EXEC_DATA_FLOAT);
5460 break;
5461
5462 case TGSI_OPCODE_FSGE:
5463 exec_vector_binary(mach, inst, micro_fsge, TGSI_EXEC_DATA_FLOAT);
5464 break;
5465
5466 case TGSI_OPCODE_FSLT:
5467 exec_vector_binary(mach, inst, micro_fslt, TGSI_EXEC_DATA_FLOAT);
5468 break;
5469
5470 case TGSI_OPCODE_FSNE:
5471 exec_vector_binary(mach, inst, micro_fsne, TGSI_EXEC_DATA_FLOAT);
5472 break;
5473
5474 case TGSI_OPCODE_IDIV:
5475 exec_vector_binary(mach, inst, micro_idiv, TGSI_EXEC_DATA_INT);
5476 break;
5477
5478 case TGSI_OPCODE_IMAX:
5479 exec_vector_binary(mach, inst, micro_imax, TGSI_EXEC_DATA_INT);
5480 break;
5481
5482 case TGSI_OPCODE_IMIN:
5483 exec_vector_binary(mach, inst, micro_imin, TGSI_EXEC_DATA_INT);
5484 break;
5485
5486 case TGSI_OPCODE_INEG:
5487 exec_vector_unary(mach, inst, micro_ineg, TGSI_EXEC_DATA_INT);
5488 break;
5489
5490 case TGSI_OPCODE_ISGE:
5491 exec_vector_binary(mach, inst, micro_isge, TGSI_EXEC_DATA_INT);
5492 break;
5493
5494 case TGSI_OPCODE_ISHR:
5495 exec_vector_binary(mach, inst, micro_ishr, TGSI_EXEC_DATA_INT);
5496 break;
5497
5498 case TGSI_OPCODE_ISLT:
5499 exec_vector_binary(mach, inst, micro_islt, TGSI_EXEC_DATA_INT);
5500 break;
5501
5502 case TGSI_OPCODE_F2U:
5503 exec_vector_unary(mach, inst, micro_f2u, TGSI_EXEC_DATA_FLOAT);
5504 break;
5505
5506 case TGSI_OPCODE_U2F:
5507 exec_vector_unary(mach, inst, micro_u2f, TGSI_EXEC_DATA_UINT);
5508 break;
5509
5510 case TGSI_OPCODE_UADD:
5511 exec_vector_binary(mach, inst, micro_uadd, TGSI_EXEC_DATA_INT);
5512 break;
5513
5514 case TGSI_OPCODE_UDIV:
5515 exec_vector_binary(mach, inst, micro_udiv, TGSI_EXEC_DATA_UINT);
5516 break;
5517
5518 case TGSI_OPCODE_UMAD:
5519 exec_vector_trinary(mach, inst, micro_umad, TGSI_EXEC_DATA_UINT);
5520 break;
5521
5522 case TGSI_OPCODE_UMAX:
5523 exec_vector_binary(mach, inst, micro_umax, TGSI_EXEC_DATA_UINT);
5524 break;
5525
5526 case TGSI_OPCODE_UMIN:
5527 exec_vector_binary(mach, inst, micro_umin, TGSI_EXEC_DATA_UINT);
5528 break;
5529
5530 case TGSI_OPCODE_UMOD:
5531 exec_vector_binary(mach, inst, micro_umod, TGSI_EXEC_DATA_UINT);
5532 break;
5533
5534 case TGSI_OPCODE_UMUL:
5535 exec_vector_binary(mach, inst, micro_umul, TGSI_EXEC_DATA_UINT);
5536 break;
5537
5538 case TGSI_OPCODE_IMUL_HI:
5539 exec_vector_binary(mach, inst, micro_imul_hi, TGSI_EXEC_DATA_INT);
5540 break;
5541
5542 case TGSI_OPCODE_UMUL_HI:
5543 exec_vector_binary(mach, inst, micro_umul_hi, TGSI_EXEC_DATA_UINT);
5544 break;
5545
5546 case TGSI_OPCODE_USEQ:
5547 exec_vector_binary(mach, inst, micro_useq, TGSI_EXEC_DATA_UINT);
5548 break;
5549
5550 case TGSI_OPCODE_USGE:
5551 exec_vector_binary(mach, inst, micro_usge, TGSI_EXEC_DATA_UINT);
5552 break;
5553
5554 case TGSI_OPCODE_USHR:
5555 exec_vector_binary(mach, inst, micro_ushr, TGSI_EXEC_DATA_UINT);
5556 break;
5557
5558 case TGSI_OPCODE_USLT:
5559 exec_vector_binary(mach, inst, micro_uslt, TGSI_EXEC_DATA_UINT);
5560 break;
5561
5562 case TGSI_OPCODE_USNE:
5563 exec_vector_binary(mach, inst, micro_usne, TGSI_EXEC_DATA_UINT);
5564 break;
5565
5566 case TGSI_OPCODE_SWITCH:
5567 exec_switch(mach, inst);
5568 break;
5569
5570 case TGSI_OPCODE_CASE:
5571 exec_case(mach, inst);
5572 break;
5573
5574 case TGSI_OPCODE_DEFAULT:
5575 exec_default(mach);
5576 break;
5577
5578 case TGSI_OPCODE_ENDSWITCH:
5579 exec_endswitch(mach);
5580 break;
5581
5582 case TGSI_OPCODE_SAMPLE_I:
5583 exec_txf(mach, inst);
5584 break;
5585
5586 case TGSI_OPCODE_SAMPLE_I_MS:
5587 exec_txf(mach, inst);
5588 break;
5589
5590 case TGSI_OPCODE_SAMPLE:
5591 exec_sample(mach, inst, TEX_MODIFIER_NONE, false);
5592 break;
5593
5594 case TGSI_OPCODE_SAMPLE_B:
5595 exec_sample(mach, inst, TEX_MODIFIER_LOD_BIAS, false);
5596 break;
5597
5598 case TGSI_OPCODE_SAMPLE_C:
5599 exec_sample(mach, inst, TEX_MODIFIER_NONE, true);
5600 break;
5601
5602 case TGSI_OPCODE_SAMPLE_C_LZ:
5603 exec_sample(mach, inst, TEX_MODIFIER_LEVEL_ZERO, true);
5604 break;
5605
5606 case TGSI_OPCODE_SAMPLE_D:
5607 exec_sample_d(mach, inst);
5608 break;
5609
5610 case TGSI_OPCODE_SAMPLE_L:
5611 exec_sample(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, false);
5612 break;
5613
5614 case TGSI_OPCODE_GATHER4:
5615 exec_sample(mach, inst, TEX_MODIFIER_GATHER, false);
5616 break;
5617
5618 case TGSI_OPCODE_SVIEWINFO:
5619 exec_txq(mach, inst);
5620 break;
5621
5622 case TGSI_OPCODE_SAMPLE_POS:
5623 assert(0);
5624 break;
5625
5626 case TGSI_OPCODE_SAMPLE_INFO:
5627 assert(0);
5628 break;
5629
5630 case TGSI_OPCODE_LOD:
5631 exec_lodq(mach, inst);
5632 break;
5633
5634 case TGSI_OPCODE_UARL:
5635 exec_vector_unary(mach, inst, micro_uarl, TGSI_EXEC_DATA_UINT);
5636 break;
5637
5638 case TGSI_OPCODE_UCMP:
5639 exec_ucmp(mach, inst);
5640 break;
5641
5642 case TGSI_OPCODE_IABS:
5643 exec_vector_unary(mach, inst, micro_iabs, TGSI_EXEC_DATA_INT);
5644 break;
5645
5646 case TGSI_OPCODE_ISSG:
5647 exec_vector_unary(mach, inst, micro_isgn, TGSI_EXEC_DATA_INT);
5648 break;
5649
5650 case TGSI_OPCODE_TEX2:
5651 /* simple texture lookup */
5652 /* src[0] = texcoord */
5653 /* src[1] = compare */
5654 /* src[2] = sampler unit */
5655 exec_tex(mach, inst, TEX_MODIFIER_NONE, 2);
5656 break;
5657 case TGSI_OPCODE_TXB2:
5658 /* simple texture lookup */
5659 /* src[0] = texcoord */
5660 /* src[1] = bias */
5661 /* src[2] = sampler unit */
5662 exec_tex(mach, inst, TEX_MODIFIER_LOD_BIAS, 2);
5663 break;
5664 case TGSI_OPCODE_TXL2:
5665 /* simple texture lookup */
5666 /* src[0] = texcoord */
5667 /* src[1] = lod */
5668 /* src[2] = sampler unit */
5669 exec_tex(mach, inst, TEX_MODIFIER_EXPLICIT_LOD, 2);
5670 break;
5671
5672 case TGSI_OPCODE_IBFE:
5673 exec_vector_trinary(mach, inst, micro_ibfe, TGSI_EXEC_DATA_INT);
5674 break;
5675 case TGSI_OPCODE_UBFE:
5676 exec_vector_trinary(mach, inst, micro_ubfe, TGSI_EXEC_DATA_UINT);
5677 break;
5678 case TGSI_OPCODE_BFI:
5679 exec_vector_quaternary(mach, inst, micro_bfi, TGSI_EXEC_DATA_UINT);
5680 break;
5681 case TGSI_OPCODE_BREV:
5682 exec_vector_unary(mach, inst, micro_brev, TGSI_EXEC_DATA_UINT);
5683 break;
5684 case TGSI_OPCODE_POPC:
5685 exec_vector_unary(mach, inst, micro_popc, TGSI_EXEC_DATA_UINT);
5686 break;
5687 case TGSI_OPCODE_LSB:
5688 exec_vector_unary(mach, inst, micro_lsb, TGSI_EXEC_DATA_UINT);
5689 break;
5690 case TGSI_OPCODE_IMSB:
5691 exec_vector_unary(mach, inst, micro_imsb, TGSI_EXEC_DATA_INT);
5692 break;
5693 case TGSI_OPCODE_UMSB:
5694 exec_vector_unary(mach, inst, micro_umsb, TGSI_EXEC_DATA_UINT);
5695 break;
5696
5697 case TGSI_OPCODE_F2D:
5698 exec_t_2_64(mach, inst, micro_f2d, TGSI_EXEC_DATA_FLOAT);
5699 break;
5700
5701 case TGSI_OPCODE_D2F:
5702 exec_64_2_t(mach, inst, micro_d2f);
5703 break;
5704
5705 case TGSI_OPCODE_DABS:
5706 exec_double_unary(mach, inst, micro_dabs);
5707 break;
5708
5709 case TGSI_OPCODE_DNEG:
5710 exec_double_unary(mach, inst, micro_dneg);
5711 break;
5712
5713 case TGSI_OPCODE_DADD:
5714 exec_double_binary(mach, inst, micro_dadd, TGSI_EXEC_DATA_DOUBLE);
5715 break;
5716
5717 case TGSI_OPCODE_DDIV:
5718 exec_double_binary(mach, inst, micro_ddiv, TGSI_EXEC_DATA_DOUBLE);
5719 break;
5720
5721 case TGSI_OPCODE_DMUL:
5722 exec_double_binary(mach, inst, micro_dmul, TGSI_EXEC_DATA_DOUBLE);
5723 break;
5724
5725 case TGSI_OPCODE_DMAX:
5726 exec_double_binary(mach, inst, micro_dmax, TGSI_EXEC_DATA_DOUBLE);
5727 break;
5728
5729 case TGSI_OPCODE_DMIN:
5730 exec_double_binary(mach, inst, micro_dmin, TGSI_EXEC_DATA_DOUBLE);
5731 break;
5732
5733 case TGSI_OPCODE_DSLT:
5734 exec_double_binary(mach, inst, micro_dslt, TGSI_EXEC_DATA_UINT);
5735 break;
5736
5737 case TGSI_OPCODE_DSGE:
5738 exec_double_binary(mach, inst, micro_dsge, TGSI_EXEC_DATA_UINT);
5739 break;
5740
5741 case TGSI_OPCODE_DSEQ:
5742 exec_double_binary(mach, inst, micro_dseq, TGSI_EXEC_DATA_UINT);
5743 break;
5744
5745 case TGSI_OPCODE_DSNE:
5746 exec_double_binary(mach, inst, micro_dsne, TGSI_EXEC_DATA_UINT);
5747 break;
5748
5749 case TGSI_OPCODE_DRCP:
5750 exec_double_unary(mach, inst, micro_drcp);
5751 break;
5752
5753 case TGSI_OPCODE_DSQRT:
5754 exec_double_unary(mach, inst, micro_dsqrt);
5755 break;
5756
5757 case TGSI_OPCODE_DRSQ:
5758 exec_double_unary(mach, inst, micro_drsq);
5759 break;
5760
5761 case TGSI_OPCODE_DMAD:
5762 exec_double_trinary(mach, inst, micro_dmad);
5763 break;
5764
5765 case TGSI_OPCODE_DFRAC:
5766 exec_double_unary(mach, inst, micro_dfrac);
5767 break;
5768
5769 case TGSI_OPCODE_DFLR:
5770 exec_double_unary(mach, inst, micro_dflr);
5771 break;
5772
5773 case TGSI_OPCODE_DLDEXP:
5774 exec_dldexp(mach, inst);
5775 break;
5776
5777 case TGSI_OPCODE_I2D:
5778 exec_t_2_64(mach, inst, micro_i2d, TGSI_EXEC_DATA_FLOAT);
5779 break;
5780
5781 case TGSI_OPCODE_D2I:
5782 exec_64_2_t(mach, inst, micro_d2i);
5783 break;
5784
5785 case TGSI_OPCODE_U2D:
5786 exec_t_2_64(mach, inst, micro_u2d, TGSI_EXEC_DATA_FLOAT);
5787 break;
5788
5789 case TGSI_OPCODE_D2U:
5790 exec_64_2_t(mach, inst, micro_d2u);
5791 break;
5792
5793 case TGSI_OPCODE_LOAD:
5794 exec_load(mach, inst);
5795 break;
5796
5797 case TGSI_OPCODE_STORE:
5798 exec_store(mach, inst);
5799 break;
5800
5801 case TGSI_OPCODE_ATOMUADD:
5802 case TGSI_OPCODE_ATOMXCHG:
5803 case TGSI_OPCODE_ATOMCAS:
5804 case TGSI_OPCODE_ATOMAND:
5805 case TGSI_OPCODE_ATOMOR:
5806 case TGSI_OPCODE_ATOMXOR:
5807 case TGSI_OPCODE_ATOMUMIN:
5808 case TGSI_OPCODE_ATOMUMAX:
5809 case TGSI_OPCODE_ATOMIMIN:
5810 case TGSI_OPCODE_ATOMIMAX:
5811 case TGSI_OPCODE_ATOMFADD:
5812 exec_atomop(mach, inst);
5813 break;
5814
5815 case TGSI_OPCODE_RESQ:
5816 exec_resq(mach, inst);
5817 break;
5818 case TGSI_OPCODE_BARRIER:
5819 case TGSI_OPCODE_MEMBAR:
5820 return true;
5821 break;
5822
5823 case TGSI_OPCODE_I64ABS:
5824 exec_double_unary(mach, inst, micro_i64abs);
5825 break;
5826
5827 case TGSI_OPCODE_I64SSG:
5828 exec_double_unary(mach, inst, micro_i64sgn);
5829 break;
5830
5831 case TGSI_OPCODE_I64NEG:
5832 exec_double_unary(mach, inst, micro_i64neg);
5833 break;
5834
5835 case TGSI_OPCODE_U64SEQ:
5836 exec_double_binary(mach, inst, micro_u64seq, TGSI_EXEC_DATA_UINT);
5837 break;
5838
5839 case TGSI_OPCODE_U64SNE:
5840 exec_double_binary(mach, inst, micro_u64sne, TGSI_EXEC_DATA_UINT);
5841 break;
5842
5843 case TGSI_OPCODE_I64SLT:
5844 exec_double_binary(mach, inst, micro_i64slt, TGSI_EXEC_DATA_UINT);
5845 break;
5846 case TGSI_OPCODE_U64SLT:
5847 exec_double_binary(mach, inst, micro_u64slt, TGSI_EXEC_DATA_UINT);
5848 break;
5849
5850 case TGSI_OPCODE_I64SGE:
5851 exec_double_binary(mach, inst, micro_i64sge, TGSI_EXEC_DATA_UINT);
5852 break;
5853 case TGSI_OPCODE_U64SGE:
5854 exec_double_binary(mach, inst, micro_u64sge, TGSI_EXEC_DATA_UINT);
5855 break;
5856
5857 case TGSI_OPCODE_I64MIN:
5858 exec_double_binary(mach, inst, micro_i64min, TGSI_EXEC_DATA_INT64);
5859 break;
5860 case TGSI_OPCODE_U64MIN:
5861 exec_double_binary(mach, inst, micro_u64min, TGSI_EXEC_DATA_UINT64);
5862 break;
5863 case TGSI_OPCODE_I64MAX:
5864 exec_double_binary(mach, inst, micro_i64max, TGSI_EXEC_DATA_INT64);
5865 break;
5866 case TGSI_OPCODE_U64MAX:
5867 exec_double_binary(mach, inst, micro_u64max, TGSI_EXEC_DATA_UINT64);
5868 break;
5869 case TGSI_OPCODE_U64ADD:
5870 exec_double_binary(mach, inst, micro_u64add, TGSI_EXEC_DATA_UINT64);
5871 break;
5872 case TGSI_OPCODE_U64MUL:
5873 exec_double_binary(mach, inst, micro_u64mul, TGSI_EXEC_DATA_UINT64);
5874 break;
5875 case TGSI_OPCODE_U64SHL:
5876 exec_arg0_64_arg1_32(mach, inst, micro_u64shl);
5877 break;
5878 case TGSI_OPCODE_I64SHR:
5879 exec_arg0_64_arg1_32(mach, inst, micro_i64shr);
5880 break;
5881 case TGSI_OPCODE_U64SHR:
5882 exec_arg0_64_arg1_32(mach, inst, micro_u64shr);
5883 break;
5884 case TGSI_OPCODE_U64DIV:
5885 exec_double_binary(mach, inst, micro_u64div, TGSI_EXEC_DATA_UINT64);
5886 break;
5887 case TGSI_OPCODE_I64DIV:
5888 exec_double_binary(mach, inst, micro_i64div, TGSI_EXEC_DATA_INT64);
5889 break;
5890 case TGSI_OPCODE_U64MOD:
5891 exec_double_binary(mach, inst, micro_u64mod, TGSI_EXEC_DATA_UINT64);
5892 break;
5893 case TGSI_OPCODE_I64MOD:
5894 exec_double_binary(mach, inst, micro_i64mod, TGSI_EXEC_DATA_INT64);
5895 break;
5896
5897 case TGSI_OPCODE_F2U64:
5898 exec_t_2_64(mach, inst, micro_f2u64, TGSI_EXEC_DATA_FLOAT);
5899 break;
5900
5901 case TGSI_OPCODE_F2I64:
5902 exec_t_2_64(mach, inst, micro_f2i64, TGSI_EXEC_DATA_FLOAT);
5903 break;
5904
5905 case TGSI_OPCODE_U2I64:
5906 exec_t_2_64(mach, inst, micro_u2i64, TGSI_EXEC_DATA_INT);
5907 break;
5908 case TGSI_OPCODE_I2I64:
5909 exec_t_2_64(mach, inst, micro_i2i64, TGSI_EXEC_DATA_INT);
5910 break;
5911
5912 case TGSI_OPCODE_D2U64:
5913 exec_double_unary(mach, inst, micro_d2u64);
5914 break;
5915
5916 case TGSI_OPCODE_D2I64:
5917 exec_double_unary(mach, inst, micro_d2i64);
5918 break;
5919
5920 case TGSI_OPCODE_U642F:
5921 exec_64_2_t(mach, inst, micro_u642f);
5922 break;
5923 case TGSI_OPCODE_I642F:
5924 exec_64_2_t(mach, inst, micro_i642f);
5925 break;
5926
5927 case TGSI_OPCODE_U642D:
5928 exec_double_unary(mach, inst, micro_u642d);
5929 break;
5930 case TGSI_OPCODE_I642D:
5931 exec_double_unary(mach, inst, micro_i642d);
5932 break;
5933 case TGSI_OPCODE_INTERP_SAMPLE:
5934 exec_interp_at_sample(mach, inst);
5935 break;
5936 case TGSI_OPCODE_INTERP_OFFSET:
5937 exec_interp_at_offset(mach, inst);
5938 break;
5939 case TGSI_OPCODE_INTERP_CENTROID:
5940 exec_interp_at_centroid(mach, inst);
5941 break;
5942 default:
5943 assert( 0 );
5944 }
5945 return false;
5946 }
5947
5948 static void
tgsi_exec_machine_setup_masks(struct tgsi_exec_machine * mach)5949 tgsi_exec_machine_setup_masks(struct tgsi_exec_machine *mach)
5950 {
5951 unsigned default_mask = 0xf;
5952
5953 mach->KillMask = 0;
5954 mach->OutputVertexOffset = 0;
5955
5956 if (mach->ShaderType == PIPE_SHADER_GEOMETRY) {
5957 for (unsigned i = 0; i < TGSI_MAX_VERTEX_STREAMS; i++) {
5958 mach->OutputPrimCount[i] = 0;
5959 mach->Primitives[i][0] = 0;
5960 }
5961 /* GS runs on a single primitive for now */
5962 default_mask = 0x1;
5963 }
5964
5965 if (mach->NonHelperMask == 0)
5966 mach->NonHelperMask = default_mask;
5967 mach->CondMask = default_mask;
5968 mach->LoopMask = default_mask;
5969 mach->ContMask = default_mask;
5970 mach->FuncMask = default_mask;
5971 mach->ExecMask = default_mask;
5972
5973 mach->Switch.mask = default_mask;
5974
5975 assert(mach->CondStackTop == 0);
5976 assert(mach->LoopStackTop == 0);
5977 assert(mach->ContStackTop == 0);
5978 assert(mach->SwitchStackTop == 0);
5979 assert(mach->BreakStackTop == 0);
5980 assert(mach->CallStackTop == 0);
5981 }
5982
5983 /**
5984 * Run TGSI interpreter.
5985 * \return bitmask of "alive" quad components
5986 */
5987 uint
tgsi_exec_machine_run(struct tgsi_exec_machine * mach,int start_pc)5988 tgsi_exec_machine_run( struct tgsi_exec_machine *mach, int start_pc )
5989 {
5990 unsigned i;
5991
5992 mach->pc = start_pc;
5993
5994 if (!start_pc) {
5995 tgsi_exec_machine_setup_masks(mach);
5996
5997 /* execute declarations (interpolants) */
5998 for (i = 0; i < mach->NumDeclarations; i++) {
5999 exec_declaration( mach, mach->Declarations+i );
6000 }
6001 }
6002
6003 {
6004 #if DEBUG_EXECUTION
6005 struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS];
6006 struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
6007 unsigned inst = 1;
6008
6009 if (!start_pc) {
6010 memset(mach->Temps, 0, sizeof(temps));
6011 if (mach->Outputs)
6012 memset(mach->Outputs, 0, sizeof(outputs));
6013 memset(temps, 0, sizeof(temps));
6014 memset(outputs, 0, sizeof(outputs));
6015 }
6016 #endif
6017
6018 /* execute instructions, until pc is set to -1 */
6019 while (mach->pc != -1) {
6020 bool barrier_hit;
6021 #if DEBUG_EXECUTION
6022 unsigned i;
6023
6024 tgsi_dump_instruction(&mach->Instructions[mach->pc], inst++);
6025 #endif
6026
6027 assert(mach->pc < (int) mach->NumInstructions);
6028 barrier_hit = exec_instruction(mach, mach->Instructions + mach->pc, &mach->pc);
6029
6030 /* for compute shaders if we hit a barrier return now for later rescheduling */
6031 if (barrier_hit && mach->ShaderType == PIPE_SHADER_COMPUTE)
6032 return 0;
6033
6034 #if DEBUG_EXECUTION
6035 for (i = 0; i < TGSI_EXEC_NUM_TEMPS; i++) {
6036 if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
6037 unsigned j;
6038
6039 memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
6040 debug_printf("TEMP[%2u] = ", i);
6041 for (j = 0; j < 4; j++) {
6042 if (j > 0) {
6043 debug_printf(" ");
6044 }
6045 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6046 temps[i].xyzw[0].f[j], temps[i].xyzw[0].u[j],
6047 temps[i].xyzw[1].f[j], temps[i].xyzw[1].u[j],
6048 temps[i].xyzw[2].f[j], temps[i].xyzw[2].u[j],
6049 temps[i].xyzw[3].f[j], temps[i].xyzw[3].u[j]);
6050 }
6051 }
6052 }
6053 if (mach->Outputs) {
6054 for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
6055 if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
6056 unsigned j;
6057
6058 memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
6059 debug_printf("OUT[%2u] = ", i);
6060 for (j = 0; j < 4; j++) {
6061 if (j > 0) {
6062 debug_printf(" ");
6063 }
6064 debug_printf("(%6f %u, %6f %u, %6f %u, %6f %u)\n",
6065 outputs[i].xyzw[0].f[j], outputs[i].xyzw[0].u[j],
6066 outputs[i].xyzw[1].f[j], outputs[i].xyzw[1].u[j],
6067 outputs[i].xyzw[2].f[j], outputs[i].xyzw[2].u[j],
6068 outputs[i].xyzw[3].f[j], outputs[i].xyzw[3].u[j]);
6069 }
6070 }
6071 }
6072 }
6073 #endif
6074 }
6075 }
6076
6077 #if 0
6078 /* we scale from floats in [0,1] to Zbuffer ints in sp_quad_depth_test.c */
6079 if (mach->ShaderType == PIPE_SHADER_FRAGMENT) {
6080 /*
6081 * Scale back depth component.
6082 */
6083 for (i = 0; i < 4; i++)
6084 mach->Outputs[0].xyzw[2].f[i] *= ctx->DrawBuffer->_DepthMaxF;
6085 }
6086 #endif
6087
6088 /* Strictly speaking, these assertions aren't really needed but they
6089 * can potentially catch some bugs in the control flow code.
6090 */
6091 assert(mach->CondStackTop == 0);
6092 assert(mach->LoopStackTop == 0);
6093 assert(mach->ContStackTop == 0);
6094 assert(mach->SwitchStackTop == 0);
6095 assert(mach->BreakStackTop == 0);
6096 assert(mach->CallStackTop == 0);
6097
6098 return ~mach->KillMask;
6099 }
6100