1 /*
2 * Copyright © 2015 Intel Corporation
3 * Copyright © 2014-2015 Broadcom
4 * Copyright (C) 2014 Rob Clark <[email protected]>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23 * IN THE SOFTWARE.
24 */
25
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39 #include "state_tracker/st_nir.h"
40
41 /**
42 * \file prog_to_nir.c
43 *
44 * A translator from Mesa IR (prog_instruction.h) to NIR. This is primarily
45 * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
46 * vertex processing. Full GLSL support should use glsl_to_nir instead.
47 */
48
49 struct ptn_compile {
50 const struct gl_context *ctx;
51 const struct gl_program *prog;
52 nir_builder build;
53 bool error;
54
55 nir_variable *parameters;
56 nir_variable *input_vars[VARYING_SLOT_MAX];
57 nir_variable *output_vars[VARYING_SLOT_MAX];
58 nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
59 nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
60 nir_def **output_regs;
61 nir_def **temp_regs;
62
63 nir_def *addr_reg;
64 };
65
66 #define SWIZ(X, Y, Z, W) \
67 (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
68 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
69
70 static nir_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)71 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
72 {
73 nir_builder *b = &c->build;
74 nir_alu_src src;
75
76 memset(&src, 0, sizeof(src));
77
78 switch (prog_src->File) {
79 case PROGRAM_UNDEFINED:
80 return nir_imm_float(b, 0.0);
81 case PROGRAM_TEMPORARY:
82 assert(!prog_src->RelAddr && prog_src->Index >= 0);
83 src.src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[prog_src->Index]));
84 break;
85 case PROGRAM_INPUT: {
86 /* ARB_vertex_program doesn't allow relative addressing on vertex
87 * attributes; ARB_fragment_program has no relative addressing at all.
88 */
89 assert(!prog_src->RelAddr);
90
91 assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
92
93 nir_variable *var = c->input_vars[prog_src->Index];
94 src.src = nir_src_for_ssa(nir_load_var(b, var));
95 break;
96 }
97 case PROGRAM_SYSTEM_VALUE: {
98 assert(!prog_src->RelAddr);
99
100 assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
101
102 nir_variable *var = c->sysval_vars[prog_src->Index];
103 src.src = nir_src_for_ssa(nir_load_var(b, var));
104 break;
105 }
106 case PROGRAM_STATE_VAR:
107 case PROGRAM_CONSTANT: {
108 /* We actually want to look at the type in the Parameters list for this,
109 * because it lets us upload constant builtin uniforms as actual
110 * constants.
111 */
112 struct gl_program_parameter_list *plist = c->prog->Parameters;
113 gl_register_file file = prog_src->RelAddr ? prog_src->File :
114 plist->Parameters[prog_src->Index].Type;
115
116 switch (file) {
117 case PROGRAM_CONSTANT:
118 if ((c->prog->arb.IndirectRegisterFiles &
119 (1 << PROGRAM_CONSTANT)) == 0) {
120 unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
121 float *v = (float *) plist->ParameterValues + pvo;
122 src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
123 break;
124 }
125 FALLTHROUGH;
126 case PROGRAM_STATE_VAR: {
127 assert(c->parameters != NULL);
128
129 nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
130
131 nir_def *index = nir_imm_int(b, prog_src->Index);
132
133 /* Add the address register. Note this is (uniquely) a scalar, so the
134 * component sizes match.
135 */
136 if (prog_src->RelAddr)
137 index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
138
139 deref = nir_build_deref_array(b, deref, index);
140 src.src = nir_src_for_ssa(nir_load_deref(b, deref));
141 break;
142 }
143 default:
144 fprintf(stderr, "bad uniform src register file: %s (%d)\n",
145 _mesa_register_file_name(file), file);
146 abort();
147 }
148 break;
149 }
150 default:
151 fprintf(stderr, "unknown src register file: %s (%d)\n",
152 _mesa_register_file_name(prog_src->File), prog_src->File);
153 abort();
154 }
155
156 nir_def *def;
157 if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
158 (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
159 /* The simple non-SWZ case. */
160 for (int i = 0; i < 4; i++)
161 src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
162
163 def = nir_mov_alu(b, src, 4);
164
165 if (prog_src->Negate)
166 def = nir_fneg(b, def);
167 } else {
168 /* The SWZ instruction allows per-component zero/one swizzles, and also
169 * per-component negation.
170 */
171 nir_def *chans[4];
172 for (int i = 0; i < 4; i++) {
173 int swizzle = GET_SWZ(prog_src->Swizzle, i);
174 if (swizzle == SWIZZLE_ZERO) {
175 chans[i] = nir_imm_float(b, 0.0);
176 } else if (swizzle == SWIZZLE_ONE) {
177 chans[i] = nir_imm_float(b, 1.0);
178 } else {
179 assert(swizzle != SWIZZLE_NIL);
180 nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
181 nir_def_init(&mov->instr, &mov->def, 1, 32);
182 mov->src[0] = src;
183 mov->src[0].swizzle[0] = swizzle;
184 nir_builder_instr_insert(b, &mov->instr);
185
186 chans[i] = &mov->def;
187 }
188
189 if (prog_src->Negate & (1 << i))
190 chans[i] = nir_fneg(b, chans[i]);
191 }
192 def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
193 }
194
195 return def;
196 }
197
198 /* EXP - Approximate Exponential Base 2
199 * dst.x = 2^{\lfloor src.x\rfloor}
200 * dst.y = src.x - \lfloor src.x\rfloor
201 * dst.z = 2^{src.x}
202 * dst.w = 1.0
203 */
204 static nir_def *
ptn_exp(nir_builder * b,nir_def ** src)205 ptn_exp(nir_builder *b, nir_def **src)
206 {
207 nir_def *srcx = ptn_channel(b, src[0], X);
208
209 return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
210 nir_fsub(b, srcx, nir_ffloor(b, srcx)),
211 nir_fexp2(b, srcx),
212 nir_imm_float(b, 1.0));
213 }
214
215 /* LOG - Approximate Logarithm Base 2
216 * dst.x = \lfloor\log_2{|src.x|}\rfloor
217 * dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
218 * dst.z = \log_2{|src.x|}
219 * dst.w = 1.0
220 */
221 static nir_def *
ptn_log(nir_builder * b,nir_def ** src)222 ptn_log(nir_builder *b, nir_def **src)
223 {
224 nir_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
225 nir_def *log2 = nir_flog2(b, abs_srcx);
226
227 return nir_vec4(b, nir_ffloor(b, log2),
228 nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
229 nir_flog2(b, abs_srcx),
230 nir_imm_float(b, 1.0));
231 }
232
233 /* DST - Distance Vector
234 * dst.x = 1.0
235 * dst.y = src0.y \times src1.y
236 * dst.z = src0.z
237 * dst.w = src1.w
238 */
239 static nir_def *
ptn_dst(nir_builder * b,nir_def ** src)240 ptn_dst(nir_builder *b, nir_def **src)
241 {
242 return nir_vec4(b, nir_imm_float(b, 1.0),
243 nir_fmul(b, ptn_channel(b, src[0], Y),
244 ptn_channel(b, src[1], Y)),
245 ptn_channel(b, src[0], Z),
246 ptn_channel(b, src[1], W));
247 }
248
249 /* LIT - Light Coefficients
250 * dst.x = 1.0
251 * dst.y = max(src.x, 0.0)
252 * dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
253 * dst.w = 1.0
254 */
255 static nir_def *
ptn_lit(nir_builder * b,nir_def ** src)256 ptn_lit(nir_builder *b, nir_def **src)
257 {
258 nir_def *src0_y = ptn_channel(b, src[0], Y);
259 nir_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
260 nir_imm_float(b, 128.0)),
261 nir_imm_float(b, -128.0));
262 nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
263 wclamp);
264
265 nir_def *z = nir_bcsel(b, nir_fle_imm(b, ptn_channel(b, src[0], X), 0.0),
266 nir_imm_float(b, 0.0), pow);
267
268 return nir_vec4(b, nir_imm_float(b, 1.0),
269 nir_fmax(b, ptn_channel(b, src[0], X),
270 nir_imm_float(b, 0.0)),
271 z,
272 nir_imm_float(b, 1.0));
273 }
274
275 /* SCS - Sine Cosine
276 * dst.x = \cos{src.x}
277 * dst.y = \sin{src.x}
278 * dst.z = 0.0
279 * dst.w = 1.0
280 */
281 static nir_def *
ptn_scs(nir_builder * b,nir_def ** src)282 ptn_scs(nir_builder *b, nir_def **src)
283 {
284 return nir_vec4(b, nir_fcos(b, ptn_channel(b, src[0], X)),
285 nir_fsin(b, ptn_channel(b, src[0], X)),
286 nir_imm_float(b, 0.0),
287 nir_imm_float(b, 1.0));
288 }
289
290 static nir_def *
ptn_xpd(nir_builder * b,nir_def ** src)291 ptn_xpd(nir_builder *b, nir_def **src)
292 {
293 nir_def *vec =
294 nir_fsub(b, nir_fmul(b, nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
295 nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
296 nir_fmul(b, nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
297 nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3)));
298
299 return nir_vec4(b, nir_channel(b, vec, 0),
300 nir_channel(b, vec, 1),
301 nir_channel(b, vec, 2),
302 nir_imm_float(b, 1.0));
303 }
304
305 static void
ptn_kil(nir_builder * b,nir_def ** src)306 ptn_kil(nir_builder *b, nir_def **src)
307 {
308 /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
309 b->exact = true;
310 nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
311 b->exact = false;
312
313 nir_discard_if(b, cmp);
314 }
315
316 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)317 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
318 {
319 *is_array = false;
320
321 switch (index) {
322 case TEXTURE_2D_MULTISAMPLE_INDEX:
323 return GLSL_SAMPLER_DIM_MS;
324 case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
325 *is_array = true;
326 return GLSL_SAMPLER_DIM_MS;
327 case TEXTURE_BUFFER_INDEX:
328 return GLSL_SAMPLER_DIM_BUF;
329 case TEXTURE_1D_INDEX:
330 return GLSL_SAMPLER_DIM_1D;
331 case TEXTURE_2D_INDEX:
332 return GLSL_SAMPLER_DIM_2D;
333 case TEXTURE_3D_INDEX:
334 return GLSL_SAMPLER_DIM_3D;
335 case TEXTURE_CUBE_INDEX:
336 return GLSL_SAMPLER_DIM_CUBE;
337 case TEXTURE_CUBE_ARRAY_INDEX:
338 *is_array = true;
339 return GLSL_SAMPLER_DIM_CUBE;
340 case TEXTURE_RECT_INDEX:
341 return GLSL_SAMPLER_DIM_RECT;
342 case TEXTURE_1D_ARRAY_INDEX:
343 *is_array = true;
344 return GLSL_SAMPLER_DIM_1D;
345 case TEXTURE_2D_ARRAY_INDEX:
346 *is_array = true;
347 return GLSL_SAMPLER_DIM_2D;
348 case TEXTURE_EXTERNAL_INDEX:
349 return GLSL_SAMPLER_DIM_EXTERNAL;
350 case NUM_TEXTURE_TARGETS:
351 break;
352 }
353 unreachable("unknown texture target");
354 }
355
356 static nir_def *
ptn_tex(struct ptn_compile * c,nir_def ** src,struct prog_instruction * prog_inst)357 ptn_tex(struct ptn_compile *c, nir_def **src,
358 struct prog_instruction *prog_inst)
359 {
360 nir_builder *b = &c->build;
361 nir_tex_instr *instr;
362 nir_texop op;
363 unsigned num_srcs;
364
365 switch (prog_inst->Opcode) {
366 case OPCODE_TEX:
367 op = nir_texop_tex;
368 num_srcs = 1;
369 break;
370 case OPCODE_TXB:
371 op = nir_texop_txb;
372 num_srcs = 2;
373 break;
374 case OPCODE_TXD:
375 op = nir_texop_txd;
376 num_srcs = 3;
377 break;
378 case OPCODE_TXL:
379 op = nir_texop_txl;
380 num_srcs = 2;
381 break;
382 case OPCODE_TXP:
383 op = nir_texop_tex;
384 num_srcs = 2;
385 break;
386 default:
387 fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
388 abort();
389 }
390
391 /* Deref sources */
392 num_srcs += 2;
393
394 if (prog_inst->TexShadow)
395 num_srcs++;
396
397 instr = nir_tex_instr_create(b->shader, num_srcs);
398 instr->op = op;
399 instr->dest_type = nir_type_float32;
400 instr->is_shadow = prog_inst->TexShadow;
401
402 bool is_array;
403 instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
404
405 instr->coord_components =
406 glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
407
408 nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
409 if (!var) {
410 const struct glsl_type *type =
411 glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
412 char samplerName[20];
413 snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
414 var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
415 var->data.binding = prog_inst->TexSrcUnit;
416 var->data.explicit_binding = true;
417 c->sampler_vars[prog_inst->TexSrcUnit] = var;
418 }
419
420 nir_deref_instr *deref = nir_build_deref_var(b, var);
421
422 unsigned src_number = 0;
423
424 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
425 &deref->def);
426 src_number++;
427 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
428 &deref->def);
429 src_number++;
430
431 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_coord,
432 nir_trim_vector(b, src[0],
433 instr->coord_components));
434 src_number++;
435
436 if (prog_inst->Opcode == OPCODE_TXP) {
437 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
438 ptn_channel(b, src[0], W));
439 src_number++;
440 }
441
442 if (prog_inst->Opcode == OPCODE_TXB) {
443 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
444 ptn_channel(b, src[0], W));
445 src_number++;
446 }
447
448 if (prog_inst->Opcode == OPCODE_TXL) {
449 instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
450 ptn_channel(b, src[0], W));
451 src_number++;
452 }
453
454 if (instr->is_shadow) {
455 if (instr->coord_components < 3)
456 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
457 else
458 instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
459
460 instr->src[src_number].src_type = nir_tex_src_comparator;
461 src_number++;
462 }
463
464 assert(src_number == num_srcs);
465
466 nir_def_init(&instr->instr, &instr->def, 4, 32);
467 nir_builder_instr_insert(b, &instr->instr);
468
469 return &instr->def;
470 }
471
472 static const nir_op op_trans[MAX_OPCODE] = {
473 [OPCODE_NOP] = 0,
474 [OPCODE_ABS] = nir_op_fabs,
475 [OPCODE_ADD] = nir_op_fadd,
476 [OPCODE_ARL] = 0,
477 [OPCODE_CMP] = 0,
478 [OPCODE_COS] = 0,
479 [OPCODE_DDX] = nir_op_fddx,
480 [OPCODE_DDY] = nir_op_fddy,
481 [OPCODE_DP2] = 0,
482 [OPCODE_DP3] = 0,
483 [OPCODE_DP4] = 0,
484 [OPCODE_DPH] = 0,
485 [OPCODE_DST] = 0,
486 [OPCODE_END] = 0,
487 [OPCODE_EX2] = 0,
488 [OPCODE_EXP] = 0,
489 [OPCODE_FLR] = nir_op_ffloor,
490 [OPCODE_FRC] = nir_op_ffract,
491 [OPCODE_LG2] = 0,
492 [OPCODE_LIT] = 0,
493 [OPCODE_LOG] = 0,
494 [OPCODE_LRP] = 0,
495 [OPCODE_MAD] = 0,
496 [OPCODE_MAX] = nir_op_fmax,
497 [OPCODE_MIN] = nir_op_fmin,
498 [OPCODE_MOV] = nir_op_mov,
499 [OPCODE_MUL] = nir_op_fmul,
500 [OPCODE_POW] = 0,
501 [OPCODE_RCP] = 0,
502
503 [OPCODE_RSQ] = 0,
504 [OPCODE_SCS] = 0,
505 [OPCODE_SGE] = 0,
506 [OPCODE_SIN] = 0,
507 [OPCODE_SLT] = 0,
508 [OPCODE_SSG] = nir_op_fsign,
509 [OPCODE_SUB] = nir_op_fsub,
510 [OPCODE_SWZ] = 0,
511 [OPCODE_TEX] = 0,
512 [OPCODE_TXB] = 0,
513 [OPCODE_TXD] = 0,
514 [OPCODE_TXL] = 0,
515 [OPCODE_TXP] = 0,
516 [OPCODE_XPD] = 0,
517 };
518
519 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)520 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
521 {
522 nir_builder *b = &c->build;
523 unsigned i;
524 const unsigned op = prog_inst->Opcode;
525
526 if (op == OPCODE_END)
527 return;
528
529 nir_def *src[3];
530 for (i = 0; i < 3; i++) {
531 src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
532 }
533
534 nir_def *dst = NULL;
535 if (c->error)
536 return;
537
538 switch (op) {
539 case OPCODE_RSQ:
540 dst = nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)));
541 break;
542
543 case OPCODE_RCP:
544 dst = nir_frcp(b, ptn_channel(b, src[0], X));
545 break;
546
547 case OPCODE_EX2:
548 dst = nir_fexp2(b, ptn_channel(b, src[0], X));
549 break;
550
551 case OPCODE_LG2:
552 dst = nir_flog2(b, ptn_channel(b, src[0], X));
553 break;
554
555 case OPCODE_POW:
556 dst = nir_fpow(b, ptn_channel(b, src[0], X), ptn_channel(b, src[1], X));
557 break;
558
559 case OPCODE_COS:
560 dst = nir_fcos(b, ptn_channel(b, src[0], X));
561 break;
562
563 case OPCODE_SIN:
564 dst = nir_fsin(b, ptn_channel(b, src[0], X));
565 break;
566
567 case OPCODE_ARL:
568 dst = nir_f2i32(b, nir_ffloor(b, src[0]));
569 break;
570
571 case OPCODE_EXP:
572 dst = ptn_exp(b, src);
573 break;
574
575 case OPCODE_LOG:
576 dst = ptn_log(b, src);
577 break;
578
579 case OPCODE_LRP:
580 dst = nir_flrp(b, src[2], src[1], src[0]);
581 break;
582
583 case OPCODE_MAD:
584 dst = nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]);
585 break;
586
587 case OPCODE_DST:
588 dst = ptn_dst(b, src);
589 break;
590
591 case OPCODE_LIT:
592 dst = ptn_lit(b, src);
593 break;
594
595 case OPCODE_XPD:
596 dst = ptn_xpd(b, src);
597 break;
598
599 case OPCODE_DP2:
600 dst = nir_fdot2(b, src[0], src[1]);
601 break;
602
603 case OPCODE_DP3:
604 dst = nir_fdot3(b, src[0], src[1]);
605 break;
606
607 case OPCODE_DP4:
608 dst = nir_fdot4(b, src[0], src[1]);
609 break;
610
611 case OPCODE_DPH:
612 dst = nir_fdph(b, src[0], src[1]);
613 break;
614
615 case OPCODE_KIL:
616 ptn_kil(b, src);
617 break;
618
619 case OPCODE_CMP:
620 dst = nir_bcsel(b, nir_flt_imm(b, src[0], 0.0), src[1], src[2]);
621 break;
622
623 case OPCODE_SCS:
624 dst = ptn_scs(b, src);
625 break;
626
627 case OPCODE_SLT:
628 dst = nir_slt(b, src[0], src[1]);
629 break;
630
631 case OPCODE_SGE:
632 dst = nir_sge(b, src[0], src[1]);
633 break;
634
635 case OPCODE_TEX:
636 case OPCODE_TXB:
637 case OPCODE_TXD:
638 case OPCODE_TXL:
639 case OPCODE_TXP:
640 dst = ptn_tex(c, src, prog_inst);
641 break;
642
643 case OPCODE_SWZ:
644 /* Extended swizzles were already handled in ptn_get_src(). */
645 dst = nir_build_alu_src_arr(b, nir_op_mov, src);
646 break;
647
648 case OPCODE_NOP:
649 break;
650
651 default:
652 if (op_trans[op] != 0) {
653 dst = nir_build_alu_src_arr(b, op_trans[op], src);
654 } else {
655 fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
656 abort();
657 }
658 break;
659 }
660
661 if (dst == NULL)
662 return;
663
664 if (dst->num_components == 1)
665 dst = nir_replicate(b, dst, 4);
666
667 assert(dst->num_components == 4);
668
669 if (prog_inst->Saturate)
670 dst = nir_fsat(b, dst);
671
672 const struct prog_dst_register *prog_dst = &prog_inst->DstReg;
673 assert(!prog_dst->RelAddr);
674
675 nir_def *reg = NULL;
676 unsigned write_mask = prog_dst->WriteMask;
677
678 switch (prog_dst->File) {
679 case PROGRAM_TEMPORARY:
680 reg = c->temp_regs[prog_dst->Index];
681 break;
682 case PROGRAM_OUTPUT:
683 reg = c->output_regs[prog_dst->Index];
684 break;
685 case PROGRAM_ADDRESS:
686 assert(prog_dst->Index == 0);
687 reg = c->addr_reg;
688
689 /* The address register (uniquely) is scalar. */
690 dst = nir_channel(b, dst, 0);
691 write_mask &= 1;
692 break;
693 case PROGRAM_UNDEFINED:
694 return;
695 }
696
697 /* In case there was some silly .y write to the scalar address reg */
698 if (write_mask == 0)
699 return;
700
701 assert(reg != NULL);
702 nir_build_store_reg(b, dst, reg, .write_mask = write_mask);
703 }
704
705 /**
706 * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
707 * variables at the end of the shader.
708 *
709 * We don't generate these incrementally as the PROGRAM_OUTPUT values are
710 * written, because there's no output load intrinsic, which means we couldn't
711 * handle writemasks.
712 */
713 static void
ptn_add_output_stores(struct ptn_compile * c)714 ptn_add_output_stores(struct ptn_compile *c)
715 {
716 nir_builder *b = &c->build;
717
718 nir_foreach_shader_out_variable(var, b->shader) {
719 nir_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
720 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
721 var->data.location == FRAG_RESULT_DEPTH) {
722 /* result.depth has this strange convention of being the .z component of
723 * a vec4 with undefined .xyw components. We resolve it to a scalar, to
724 * match GLSL's gl_FragDepth and the expectations of most backends.
725 */
726 src = nir_channel(b, src, 2);
727 }
728 if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
729 (var->data.location == VARYING_SLOT_FOGC ||
730 var->data.location == VARYING_SLOT_PSIZ)) {
731 /* result.{fogcoord,psiz} is a single component value */
732 src = nir_channel(b, src, 0);
733 }
734 unsigned num_components = glsl_get_vector_elements(var->type);
735 nir_store_var(b, var, src, (1 << num_components) - 1);
736 }
737 }
738
739 static void
setup_registers_and_variables(struct ptn_compile * c)740 setup_registers_and_variables(struct ptn_compile *c)
741 {
742 nir_builder *b = &c->build;
743 struct nir_shader *shader = b->shader;
744
745 /* Create input variables. */
746 uint64_t inputs_read = c->prog->info.inputs_read;
747 while (inputs_read) {
748 const int i = u_bit_scan64(&inputs_read);
749
750 if (c->ctx->Const.GLSLFragCoordIsSysVal &&
751 shader->info.stage == MESA_SHADER_FRAGMENT &&
752 i == VARYING_SLOT_POS) {
753 c->input_vars[i] = nir_create_variable_with_location(shader, nir_var_system_value,
754 SYSTEM_VALUE_FRAG_COORD, glsl_vec4_type());
755 continue;
756 }
757
758 nir_variable *var =
759 nir_create_variable_with_location(shader, nir_var_shader_in,
760 i, glsl_vec4_type());
761
762 if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
763 if (i == VARYING_SLOT_FOGC) {
764 /* fogcoord is defined as <f, 0.0, 0.0, 1.0>. Make the actual
765 * input variable a float, and create a local containing the
766 * full vec4 value.
767 */
768 var->type = glsl_float_type();
769
770 nir_variable *fullvar =
771 nir_local_variable_create(b->impl, glsl_vec4_type(),
772 "fogcoord_tmp");
773
774 nir_store_var(b, fullvar,
775 nir_vec4(b, nir_load_var(b, var),
776 nir_imm_float(b, 0.0),
777 nir_imm_float(b, 0.0),
778 nir_imm_float(b, 1.0)),
779 WRITEMASK_XYZW);
780
781 /* We inserted the real input into the list so the driver has real
782 * inputs, but we set c->input_vars[i] to the temporary so we use
783 * the splatted value.
784 */
785 c->input_vars[i] = fullvar;
786 continue;
787 }
788 }
789
790 c->input_vars[i] = var;
791 }
792
793 /* Create system value variables */
794 int i;
795 BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
796 c->sysval_vars[i] = nir_create_variable_with_location(b->shader, nir_var_system_value,
797 i, glsl_vec4_type());
798 }
799
800 /* Create output registers and variables. */
801 int max_outputs = util_last_bit64(c->prog->info.outputs_written);
802 c->output_regs = rzalloc_array(c, nir_def *, max_outputs);
803
804 uint64_t outputs_written = c->prog->info.outputs_written;
805 while (outputs_written) {
806 const int i = u_bit_scan64(&outputs_written);
807
808 /* Since we can't load from outputs in the IR, we make temporaries
809 * for the outputs and emit stores to the real outputs at the end of
810 * the shader.
811 */
812 nir_def *reg = nir_decl_reg(b, 4, 32, 0);
813
814 const struct glsl_type *type;
815 if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
816 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
817 (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
818 type = glsl_float_type();
819 else
820 type = glsl_vec4_type();
821
822 nir_variable *var =
823 nir_variable_create(shader, nir_var_shader_out, type,
824 ralloc_asprintf(shader, "out_%d", i));
825 var->data.location = i;
826 var->data.index = 0;
827
828 c->output_regs[i] = reg;
829 c->output_vars[i] = var;
830 }
831
832 /* Create temporary registers. */
833 c->temp_regs = rzalloc_array(c, nir_def *,
834 c->prog->arb.NumTemporaries);
835
836 for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
837 c->temp_regs[i] = nir_decl_reg(b, 4, 32, 0);
838 }
839
840 /* Create the address register (for ARB_vertex_program). This is uniquely a
841 * scalar, requiring special handling for stores.
842 */
843 c->addr_reg = nir_decl_reg(b, 1, 32, 0);
844 }
845
846 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog,const nir_shader_compiler_options * options)847 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog,
848 const nir_shader_compiler_options *options)
849 {
850 struct ptn_compile *c;
851 struct nir_shader *s;
852 gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
853
854 c = rzalloc(NULL, struct ptn_compile);
855 if (!c)
856 return NULL;
857 c->prog = prog;
858 c->ctx = ctx;
859
860 c->build = nir_builder_init_simple_shader(stage, options, NULL);
861
862 /* Copy the shader_info from the gl_program */
863 c->build.shader->info = prog->info;
864
865 s = c->build.shader;
866
867 if (prog->Parameters->NumParameters > 0) {
868 const struct glsl_type *type =
869 glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
870 c->parameters =
871 nir_variable_create(s, nir_var_uniform, type,
872 prog->Parameters->Parameters[0].Name);
873 }
874
875 setup_registers_and_variables(c);
876 if (unlikely(c->error))
877 goto fail;
878
879 for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
880 ptn_emit_instruction(c, &prog->arb.Instructions[i]);
881
882 if (unlikely(c->error))
883 break;
884 }
885
886 ptn_add_output_stores(c);
887
888 s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
889 s->info.num_textures = util_last_bit(prog->SamplersUsed);
890 s->info.num_ubos = 0;
891 s->info.num_abos = 0;
892 s->info.num_ssbos = 0;
893 s->info.num_images = 0;
894 s->info.uses_texture_gather = false;
895 s->info.clip_distance_array_size = 0;
896 s->info.cull_distance_array_size = 0;
897 s->info.separate_shader = true;
898 s->info.io_lowered = false;
899 s->info.internal = false;
900
901 /* ARB_vp: */
902 if (prog->arb.IsPositionInvariant) {
903 NIR_PASS(_, s, st_nir_lower_position_invariant,
904 ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
905 prog->Parameters);
906 }
907
908 /* Add OPTION ARB_fog_exp code */
909 if (prog->arb.Fog)
910 NIR_PASS(_, s, st_nir_lower_fog, prog->arb.Fog, prog->Parameters);
911
912 fail:
913 if (c->error) {
914 ralloc_free(s);
915 s = NULL;
916 }
917 ralloc_free(c);
918 return s;
919 }
920