xref: /aosp_15_r20/external/mesa3d/src/mesa/program/prog_to_nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015 Intel Corporation
3  * Copyright © 2014-2015 Broadcom
4  * Copyright (C) 2014 Rob Clark <[email protected]>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
22  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
23  * IN THE SOFTWARE.
24  */
25 
26 #include "compiler/nir/nir.h"
27 #include "compiler/nir/nir_builder.h"
28 #include "compiler/glsl/list.h"
29 
30 #include "main/mtypes.h"
31 #include "main/shader_types.h"
32 #include "util/ralloc.h"
33 
34 #include "prog_to_nir.h"
35 #include "prog_instruction.h"
36 #include "prog_parameter.h"
37 #include "prog_print.h"
38 #include "program.h"
39 #include "state_tracker/st_nir.h"
40 
41 /**
42  * \file prog_to_nir.c
43  *
44  * A translator from Mesa IR (prog_instruction.h) to NIR.  This is primarily
45  * intended to support ARB_vertex_program, ARB_fragment_program, and fixed-function
46  * vertex processing.  Full GLSL support should use glsl_to_nir instead.
47  */
48 
49 struct ptn_compile {
50    const struct gl_context *ctx;
51    const struct gl_program *prog;
52    nir_builder build;
53    bool error;
54 
55    nir_variable *parameters;
56    nir_variable *input_vars[VARYING_SLOT_MAX];
57    nir_variable *output_vars[VARYING_SLOT_MAX];
58    nir_variable *sysval_vars[SYSTEM_VALUE_MAX];
59    nir_variable *sampler_vars[32]; /* matches number of bits in TexSrcUnit */
60    nir_def **output_regs;
61    nir_def **temp_regs;
62 
63    nir_def *addr_reg;
64 };
65 
66 #define SWIZ(X, Y, Z, W) \
67    (unsigned[4]){ SWIZZLE_##X, SWIZZLE_##Y, SWIZZLE_##Z, SWIZZLE_##W }
68 #define ptn_channel(b, src, ch) nir_channel(b, src, SWIZZLE_##ch)
69 
70 static nir_def *
ptn_get_src(struct ptn_compile * c,const struct prog_src_register * prog_src)71 ptn_get_src(struct ptn_compile *c, const struct prog_src_register *prog_src)
72 {
73    nir_builder *b = &c->build;
74    nir_alu_src src;
75 
76    memset(&src, 0, sizeof(src));
77 
78    switch (prog_src->File) {
79    case PROGRAM_UNDEFINED:
80       return nir_imm_float(b, 0.0);
81    case PROGRAM_TEMPORARY:
82       assert(!prog_src->RelAddr && prog_src->Index >= 0);
83       src.src = nir_src_for_ssa(nir_load_reg(b, c->temp_regs[prog_src->Index]));
84       break;
85    case PROGRAM_INPUT: {
86       /* ARB_vertex_program doesn't allow relative addressing on vertex
87        * attributes; ARB_fragment_program has no relative addressing at all.
88        */
89       assert(!prog_src->RelAddr);
90 
91       assert(prog_src->Index >= 0 && prog_src->Index < VARYING_SLOT_MAX);
92 
93       nir_variable *var = c->input_vars[prog_src->Index];
94       src.src = nir_src_for_ssa(nir_load_var(b, var));
95       break;
96    }
97    case PROGRAM_SYSTEM_VALUE: {
98       assert(!prog_src->RelAddr);
99 
100       assert(prog_src->Index >= 0 && prog_src->Index < SYSTEM_VALUE_MAX);
101 
102       nir_variable *var = c->sysval_vars[prog_src->Index];
103       src.src = nir_src_for_ssa(nir_load_var(b, var));
104       break;
105    }
106    case PROGRAM_STATE_VAR:
107    case PROGRAM_CONSTANT: {
108       /* We actually want to look at the type in the Parameters list for this,
109        * because it lets us upload constant builtin uniforms as actual
110        * constants.
111        */
112       struct gl_program_parameter_list *plist = c->prog->Parameters;
113       gl_register_file file = prog_src->RelAddr ? prog_src->File :
114          plist->Parameters[prog_src->Index].Type;
115 
116       switch (file) {
117       case PROGRAM_CONSTANT:
118          if ((c->prog->arb.IndirectRegisterFiles &
119               (1 << PROGRAM_CONSTANT)) == 0) {
120             unsigned pvo = plist->Parameters[prog_src->Index].ValueOffset;
121             float *v = (float *) plist->ParameterValues + pvo;
122             src.src = nir_src_for_ssa(nir_imm_vec4(b, v[0], v[1], v[2], v[3]));
123             break;
124          }
125          FALLTHROUGH;
126       case PROGRAM_STATE_VAR: {
127          assert(c->parameters != NULL);
128 
129          nir_deref_instr *deref = nir_build_deref_var(b, c->parameters);
130 
131          nir_def *index = nir_imm_int(b, prog_src->Index);
132 
133          /* Add the address register. Note this is (uniquely) a scalar, so the
134           * component sizes match.
135           */
136          if (prog_src->RelAddr)
137             index = nir_iadd(b, index, nir_load_reg(b, c->addr_reg));
138 
139          deref = nir_build_deref_array(b, deref, index);
140          src.src = nir_src_for_ssa(nir_load_deref(b, deref));
141          break;
142       }
143       default:
144          fprintf(stderr, "bad uniform src register file: %s (%d)\n",
145                  _mesa_register_file_name(file), file);
146          abort();
147       }
148       break;
149    }
150    default:
151       fprintf(stderr, "unknown src register file: %s (%d)\n",
152               _mesa_register_file_name(prog_src->File), prog_src->File);
153       abort();
154    }
155 
156    nir_def *def;
157    if (!HAS_EXTENDED_SWIZZLE(prog_src->Swizzle) &&
158        (prog_src->Negate == NEGATE_NONE || prog_src->Negate == NEGATE_XYZW)) {
159       /* The simple non-SWZ case. */
160       for (int i = 0; i < 4; i++)
161          src.swizzle[i] = GET_SWZ(prog_src->Swizzle, i);
162 
163       def = nir_mov_alu(b, src, 4);
164 
165       if (prog_src->Negate)
166          def = nir_fneg(b, def);
167    } else {
168       /* The SWZ instruction allows per-component zero/one swizzles, and also
169        * per-component negation.
170        */
171       nir_def *chans[4];
172       for (int i = 0; i < 4; i++) {
173          int swizzle = GET_SWZ(prog_src->Swizzle, i);
174          if (swizzle == SWIZZLE_ZERO) {
175             chans[i] = nir_imm_float(b, 0.0);
176          } else if (swizzle == SWIZZLE_ONE) {
177             chans[i] = nir_imm_float(b, 1.0);
178          } else {
179             assert(swizzle != SWIZZLE_NIL);
180             nir_alu_instr *mov = nir_alu_instr_create(b->shader, nir_op_mov);
181             nir_def_init(&mov->instr, &mov->def, 1, 32);
182             mov->src[0] = src;
183             mov->src[0].swizzle[0] = swizzle;
184             nir_builder_instr_insert(b, &mov->instr);
185 
186             chans[i] = &mov->def;
187          }
188 
189          if (prog_src->Negate & (1 << i))
190             chans[i] = nir_fneg(b, chans[i]);
191       }
192       def = nir_vec4(b, chans[0], chans[1], chans[2], chans[3]);
193    }
194 
195    return def;
196 }
197 
198 /* EXP - Approximate Exponential Base 2
199  *  dst.x = 2^{\lfloor src.x\rfloor}
200  *  dst.y = src.x - \lfloor src.x\rfloor
201  *  dst.z = 2^{src.x}
202  *  dst.w = 1.0
203  */
204 static nir_def *
ptn_exp(nir_builder * b,nir_def ** src)205 ptn_exp(nir_builder *b, nir_def **src)
206 {
207    nir_def *srcx = ptn_channel(b, src[0], X);
208 
209    return nir_vec4(b, nir_fexp2(b, nir_ffloor(b, srcx)),
210                       nir_fsub(b, srcx, nir_ffloor(b, srcx)),
211                       nir_fexp2(b, srcx),
212                       nir_imm_float(b, 1.0));
213 }
214 
215 /* LOG - Approximate Logarithm Base 2
216  *  dst.x = \lfloor\log_2{|src.x|}\rfloor
217  *  dst.y = \frac{|src.x|}{2^{\lfloor\log_2{|src.x|}\rfloor}}
218  *  dst.z = \log_2{|src.x|}
219  *  dst.w = 1.0
220  */
221 static nir_def *
ptn_log(nir_builder * b,nir_def ** src)222 ptn_log(nir_builder *b, nir_def **src)
223 {
224    nir_def *abs_srcx = nir_fabs(b, ptn_channel(b, src[0], X));
225    nir_def *log2 = nir_flog2(b, abs_srcx);
226 
227    return nir_vec4(b, nir_ffloor(b, log2),
228                       nir_fdiv(b, abs_srcx, nir_fexp2(b, nir_ffloor(b, log2))),
229                       nir_flog2(b, abs_srcx),
230                       nir_imm_float(b, 1.0));
231 }
232 
233 /* DST - Distance Vector
234  *   dst.x = 1.0
235  *   dst.y = src0.y \times src1.y
236  *   dst.z = src0.z
237  *   dst.w = src1.w
238  */
239 static nir_def *
ptn_dst(nir_builder * b,nir_def ** src)240 ptn_dst(nir_builder *b, nir_def **src)
241 {
242    return nir_vec4(b, nir_imm_float(b, 1.0),
243                       nir_fmul(b, ptn_channel(b, src[0], Y),
244                                   ptn_channel(b, src[1], Y)),
245                       ptn_channel(b, src[0], Z),
246                       ptn_channel(b, src[1], W));
247 }
248 
249 /* LIT - Light Coefficients
250  *  dst.x = 1.0
251  *  dst.y = max(src.x, 0.0)
252  *  dst.z = (src.x > 0.0) ? max(src.y, 0.0)^{clamp(src.w, -128.0, 128.0))} : 0
253  *  dst.w = 1.0
254  */
255 static nir_def *
ptn_lit(nir_builder * b,nir_def ** src)256 ptn_lit(nir_builder *b, nir_def **src)
257 {
258    nir_def *src0_y = ptn_channel(b, src[0], Y);
259    nir_def *wclamp = nir_fmax(b, nir_fmin(b, ptn_channel(b, src[0], W),
260                                               nir_imm_float(b, 128.0)),
261                                   nir_imm_float(b, -128.0));
262    nir_def *pow = nir_fpow(b, nir_fmax(b, src0_y, nir_imm_float(b, 0.0)),
263                                wclamp);
264 
265    nir_def *z = nir_bcsel(b, nir_fle_imm(b, ptn_channel(b, src[0], X), 0.0),
266                               nir_imm_float(b, 0.0), pow);
267 
268    return nir_vec4(b, nir_imm_float(b, 1.0),
269                       nir_fmax(b, ptn_channel(b, src[0], X),
270                                   nir_imm_float(b, 0.0)),
271                       z,
272                       nir_imm_float(b, 1.0));
273 }
274 
275 /* SCS - Sine Cosine
276  *   dst.x = \cos{src.x}
277  *   dst.y = \sin{src.x}
278  *   dst.z = 0.0
279  *   dst.w = 1.0
280  */
281 static nir_def *
ptn_scs(nir_builder * b,nir_def ** src)282 ptn_scs(nir_builder *b, nir_def **src)
283 {
284    return nir_vec4(b, nir_fcos(b, ptn_channel(b, src[0], X)),
285                       nir_fsin(b, ptn_channel(b, src[0], X)),
286                       nir_imm_float(b, 0.0),
287                       nir_imm_float(b, 1.0));
288 }
289 
290 static nir_def *
ptn_xpd(nir_builder * b,nir_def ** src)291 ptn_xpd(nir_builder *b, nir_def **src)
292 {
293    nir_def *vec =
294       nir_fsub(b, nir_fmul(b, nir_swizzle(b, src[0], SWIZ(Y, Z, X, W), 3),
295                               nir_swizzle(b, src[1], SWIZ(Z, X, Y, W), 3)),
296                   nir_fmul(b, nir_swizzle(b, src[1], SWIZ(Y, Z, X, W), 3),
297                               nir_swizzle(b, src[0], SWIZ(Z, X, Y, W), 3)));
298 
299    return nir_vec4(b, nir_channel(b, vec, 0),
300                       nir_channel(b, vec, 1),
301                       nir_channel(b, vec, 2),
302                       nir_imm_float(b, 1.0));
303 }
304 
305 static void
ptn_kil(nir_builder * b,nir_def ** src)306 ptn_kil(nir_builder *b, nir_def **src)
307 {
308    /* flt must be exact, because NaN shouldn't discard. (apps rely on this) */
309    b->exact = true;
310    nir_def *cmp = nir_bany(b, nir_flt_imm(b, src[0], 0.0));
311    b->exact = false;
312 
313    nir_discard_if(b, cmp);
314 }
315 
316 enum glsl_sampler_dim
_mesa_texture_index_to_sampler_dim(gl_texture_index index,bool * is_array)317 _mesa_texture_index_to_sampler_dim(gl_texture_index index, bool *is_array)
318 {
319    *is_array = false;
320 
321    switch (index) {
322    case TEXTURE_2D_MULTISAMPLE_INDEX:
323       return GLSL_SAMPLER_DIM_MS;
324    case TEXTURE_2D_MULTISAMPLE_ARRAY_INDEX:
325       *is_array = true;
326       return GLSL_SAMPLER_DIM_MS;
327    case TEXTURE_BUFFER_INDEX:
328       return GLSL_SAMPLER_DIM_BUF;
329    case TEXTURE_1D_INDEX:
330       return GLSL_SAMPLER_DIM_1D;
331    case TEXTURE_2D_INDEX:
332       return GLSL_SAMPLER_DIM_2D;
333    case TEXTURE_3D_INDEX:
334       return GLSL_SAMPLER_DIM_3D;
335    case TEXTURE_CUBE_INDEX:
336       return GLSL_SAMPLER_DIM_CUBE;
337    case TEXTURE_CUBE_ARRAY_INDEX:
338       *is_array = true;
339       return GLSL_SAMPLER_DIM_CUBE;
340    case TEXTURE_RECT_INDEX:
341       return GLSL_SAMPLER_DIM_RECT;
342    case TEXTURE_1D_ARRAY_INDEX:
343       *is_array = true;
344       return GLSL_SAMPLER_DIM_1D;
345    case TEXTURE_2D_ARRAY_INDEX:
346       *is_array = true;
347       return GLSL_SAMPLER_DIM_2D;
348    case TEXTURE_EXTERNAL_INDEX:
349       return GLSL_SAMPLER_DIM_EXTERNAL;
350    case NUM_TEXTURE_TARGETS:
351       break;
352    }
353    unreachable("unknown texture target");
354 }
355 
356 static nir_def *
ptn_tex(struct ptn_compile * c,nir_def ** src,struct prog_instruction * prog_inst)357 ptn_tex(struct ptn_compile *c, nir_def **src,
358         struct prog_instruction *prog_inst)
359 {
360    nir_builder *b = &c->build;
361    nir_tex_instr *instr;
362    nir_texop op;
363    unsigned num_srcs;
364 
365    switch (prog_inst->Opcode) {
366    case OPCODE_TEX:
367       op = nir_texop_tex;
368       num_srcs = 1;
369       break;
370    case OPCODE_TXB:
371       op = nir_texop_txb;
372       num_srcs = 2;
373       break;
374    case OPCODE_TXD:
375       op = nir_texop_txd;
376       num_srcs = 3;
377       break;
378    case OPCODE_TXL:
379       op = nir_texop_txl;
380       num_srcs = 2;
381       break;
382    case OPCODE_TXP:
383       op = nir_texop_tex;
384       num_srcs = 2;
385       break;
386    default:
387       fprintf(stderr, "unknown tex op %d\n", prog_inst->Opcode);
388       abort();
389    }
390 
391    /* Deref sources */
392    num_srcs += 2;
393 
394    if (prog_inst->TexShadow)
395       num_srcs++;
396 
397    instr = nir_tex_instr_create(b->shader, num_srcs);
398    instr->op = op;
399    instr->dest_type = nir_type_float32;
400    instr->is_shadow = prog_inst->TexShadow;
401 
402    bool is_array;
403    instr->sampler_dim = _mesa_texture_index_to_sampler_dim(prog_inst->TexSrcTarget, &is_array);
404 
405    instr->coord_components =
406       glsl_get_sampler_dim_coordinate_components(instr->sampler_dim);
407 
408    nir_variable *var = c->sampler_vars[prog_inst->TexSrcUnit];
409    if (!var) {
410       const struct glsl_type *type =
411          glsl_sampler_type(instr->sampler_dim, instr->is_shadow, false, GLSL_TYPE_FLOAT);
412       char samplerName[20];
413       snprintf(samplerName, sizeof(samplerName), "sampler_%d", prog_inst->TexSrcUnit);
414       var = nir_variable_create(b->shader, nir_var_uniform, type, samplerName);
415       var->data.binding = prog_inst->TexSrcUnit;
416       var->data.explicit_binding = true;
417       c->sampler_vars[prog_inst->TexSrcUnit] = var;
418    }
419 
420    nir_deref_instr *deref = nir_build_deref_var(b, var);
421 
422    unsigned src_number = 0;
423 
424    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_texture_deref,
425                                                 &deref->def);
426    src_number++;
427    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_sampler_deref,
428                                                 &deref->def);
429    src_number++;
430 
431    instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_coord,
432                                                 nir_trim_vector(b, src[0],
433                                                                 instr->coord_components));
434    src_number++;
435 
436    if (prog_inst->Opcode == OPCODE_TXP) {
437       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_projector,
438                                                    ptn_channel(b, src[0], W));
439       src_number++;
440    }
441 
442    if (prog_inst->Opcode == OPCODE_TXB) {
443       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_bias,
444                                                    ptn_channel(b, src[0], W));
445       src_number++;
446    }
447 
448    if (prog_inst->Opcode == OPCODE_TXL) {
449       instr->src[src_number] = nir_tex_src_for_ssa(nir_tex_src_lod,
450                                                    ptn_channel(b, src[0], W));
451       src_number++;
452    }
453 
454    if (instr->is_shadow) {
455       if (instr->coord_components < 3)
456          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], Z));
457       else
458          instr->src[src_number].src = nir_src_for_ssa(ptn_channel(b, src[0], W));
459 
460       instr->src[src_number].src_type = nir_tex_src_comparator;
461       src_number++;
462    }
463 
464    assert(src_number == num_srcs);
465 
466    nir_def_init(&instr->instr, &instr->def, 4, 32);
467    nir_builder_instr_insert(b, &instr->instr);
468 
469    return &instr->def;
470 }
471 
472 static const nir_op op_trans[MAX_OPCODE] = {
473    [OPCODE_NOP] = 0,
474    [OPCODE_ABS] = nir_op_fabs,
475    [OPCODE_ADD] = nir_op_fadd,
476    [OPCODE_ARL] = 0,
477    [OPCODE_CMP] = 0,
478    [OPCODE_COS] = 0,
479    [OPCODE_DDX] = nir_op_fddx,
480    [OPCODE_DDY] = nir_op_fddy,
481    [OPCODE_DP2] = 0,
482    [OPCODE_DP3] = 0,
483    [OPCODE_DP4] = 0,
484    [OPCODE_DPH] = 0,
485    [OPCODE_DST] = 0,
486    [OPCODE_END] = 0,
487    [OPCODE_EX2] = 0,
488    [OPCODE_EXP] = 0,
489    [OPCODE_FLR] = nir_op_ffloor,
490    [OPCODE_FRC] = nir_op_ffract,
491    [OPCODE_LG2] = 0,
492    [OPCODE_LIT] = 0,
493    [OPCODE_LOG] = 0,
494    [OPCODE_LRP] = 0,
495    [OPCODE_MAD] = 0,
496    [OPCODE_MAX] = nir_op_fmax,
497    [OPCODE_MIN] = nir_op_fmin,
498    [OPCODE_MOV] = nir_op_mov,
499    [OPCODE_MUL] = nir_op_fmul,
500    [OPCODE_POW] = 0,
501    [OPCODE_RCP] = 0,
502 
503    [OPCODE_RSQ] = 0,
504    [OPCODE_SCS] = 0,
505    [OPCODE_SGE] = 0,
506    [OPCODE_SIN] = 0,
507    [OPCODE_SLT] = 0,
508    [OPCODE_SSG] = nir_op_fsign,
509    [OPCODE_SUB] = nir_op_fsub,
510    [OPCODE_SWZ] = 0,
511    [OPCODE_TEX] = 0,
512    [OPCODE_TXB] = 0,
513    [OPCODE_TXD] = 0,
514    [OPCODE_TXL] = 0,
515    [OPCODE_TXP] = 0,
516    [OPCODE_XPD] = 0,
517 };
518 
519 static void
ptn_emit_instruction(struct ptn_compile * c,struct prog_instruction * prog_inst)520 ptn_emit_instruction(struct ptn_compile *c, struct prog_instruction *prog_inst)
521 {
522    nir_builder *b = &c->build;
523    unsigned i;
524    const unsigned op = prog_inst->Opcode;
525 
526    if (op == OPCODE_END)
527       return;
528 
529    nir_def *src[3];
530    for (i = 0; i < 3; i++) {
531       src[i] = ptn_get_src(c, &prog_inst->SrcReg[i]);
532    }
533 
534    nir_def *dst = NULL;
535    if (c->error)
536       return;
537 
538    switch (op) {
539    case OPCODE_RSQ:
540       dst = nir_frsq(b, nir_fabs(b, ptn_channel(b, src[0], X)));
541       break;
542 
543    case OPCODE_RCP:
544       dst = nir_frcp(b, ptn_channel(b, src[0], X));
545       break;
546 
547    case OPCODE_EX2:
548       dst = nir_fexp2(b, ptn_channel(b, src[0], X));
549       break;
550 
551    case OPCODE_LG2:
552       dst = nir_flog2(b, ptn_channel(b, src[0], X));
553       break;
554 
555    case OPCODE_POW:
556       dst = nir_fpow(b, ptn_channel(b, src[0], X), ptn_channel(b, src[1], X));
557       break;
558 
559    case OPCODE_COS:
560       dst = nir_fcos(b, ptn_channel(b, src[0], X));
561       break;
562 
563    case OPCODE_SIN:
564       dst = nir_fsin(b, ptn_channel(b, src[0], X));
565       break;
566 
567    case OPCODE_ARL:
568       dst = nir_f2i32(b, nir_ffloor(b, src[0]));
569       break;
570 
571    case OPCODE_EXP:
572       dst = ptn_exp(b, src);
573       break;
574 
575    case OPCODE_LOG:
576       dst = ptn_log(b, src);
577       break;
578 
579    case OPCODE_LRP:
580       dst = nir_flrp(b, src[2], src[1], src[0]);
581       break;
582 
583    case OPCODE_MAD:
584       dst = nir_fadd(b, nir_fmul(b, src[0], src[1]), src[2]);
585       break;
586 
587    case OPCODE_DST:
588       dst = ptn_dst(b, src);
589       break;
590 
591    case OPCODE_LIT:
592       dst = ptn_lit(b, src);
593       break;
594 
595    case OPCODE_XPD:
596       dst = ptn_xpd(b, src);
597       break;
598 
599    case OPCODE_DP2:
600       dst = nir_fdot2(b, src[0], src[1]);
601       break;
602 
603    case OPCODE_DP3:
604       dst = nir_fdot3(b, src[0], src[1]);
605       break;
606 
607    case OPCODE_DP4:
608       dst = nir_fdot4(b, src[0], src[1]);
609       break;
610 
611    case OPCODE_DPH:
612       dst = nir_fdph(b, src[0], src[1]);
613       break;
614 
615    case OPCODE_KIL:
616       ptn_kil(b, src);
617       break;
618 
619    case OPCODE_CMP:
620       dst = nir_bcsel(b, nir_flt_imm(b, src[0], 0.0), src[1], src[2]);
621       break;
622 
623    case OPCODE_SCS:
624       dst = ptn_scs(b, src);
625       break;
626 
627    case OPCODE_SLT:
628       dst = nir_slt(b, src[0], src[1]);
629       break;
630 
631    case OPCODE_SGE:
632       dst = nir_sge(b, src[0], src[1]);
633       break;
634 
635    case OPCODE_TEX:
636    case OPCODE_TXB:
637    case OPCODE_TXD:
638    case OPCODE_TXL:
639    case OPCODE_TXP:
640       dst = ptn_tex(c, src, prog_inst);
641       break;
642 
643    case OPCODE_SWZ:
644       /* Extended swizzles were already handled in ptn_get_src(). */
645       dst = nir_build_alu_src_arr(b, nir_op_mov, src);
646       break;
647 
648    case OPCODE_NOP:
649       break;
650 
651    default:
652       if (op_trans[op] != 0) {
653          dst = nir_build_alu_src_arr(b, op_trans[op], src);
654       } else {
655          fprintf(stderr, "unknown opcode: %s\n", _mesa_opcode_string(op));
656          abort();
657       }
658       break;
659    }
660 
661    if (dst == NULL)
662       return;
663 
664    if (dst->num_components == 1)
665       dst = nir_replicate(b, dst, 4);
666 
667    assert(dst->num_components == 4);
668 
669    if (prog_inst->Saturate)
670       dst = nir_fsat(b, dst);
671 
672    const struct prog_dst_register *prog_dst = &prog_inst->DstReg;
673    assert(!prog_dst->RelAddr);
674 
675    nir_def *reg = NULL;
676    unsigned write_mask = prog_dst->WriteMask;
677 
678    switch (prog_dst->File) {
679    case PROGRAM_TEMPORARY:
680       reg = c->temp_regs[prog_dst->Index];
681       break;
682    case PROGRAM_OUTPUT:
683       reg = c->output_regs[prog_dst->Index];
684       break;
685    case PROGRAM_ADDRESS:
686       assert(prog_dst->Index == 0);
687       reg = c->addr_reg;
688 
689       /* The address register (uniquely) is scalar. */
690       dst = nir_channel(b, dst, 0);
691       write_mask &= 1;
692       break;
693    case PROGRAM_UNDEFINED:
694       return;
695    }
696 
697    /* In case there was some silly .y write to the scalar address reg */
698    if (write_mask == 0)
699       return;
700 
701    assert(reg != NULL);
702    nir_build_store_reg(b, dst, reg, .write_mask = write_mask);
703 }
704 
705 /**
706  * Puts a NIR intrinsic to store of each PROGRAM_OUTPUT value to the output
707  * variables at the end of the shader.
708  *
709  * We don't generate these incrementally as the PROGRAM_OUTPUT values are
710  * written, because there's no output load intrinsic, which means we couldn't
711  * handle writemasks.
712  */
713 static void
ptn_add_output_stores(struct ptn_compile * c)714 ptn_add_output_stores(struct ptn_compile *c)
715 {
716    nir_builder *b = &c->build;
717 
718    nir_foreach_shader_out_variable(var, b->shader) {
719       nir_def *src = nir_load_reg(b, c->output_regs[var->data.location]);
720       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB &&
721           var->data.location == FRAG_RESULT_DEPTH) {
722          /* result.depth has this strange convention of being the .z component of
723           * a vec4 with undefined .xyw components.  We resolve it to a scalar, to
724           * match GLSL's gl_FragDepth and the expectations of most backends.
725           */
726          src = nir_channel(b, src, 2);
727       }
728       if (c->prog->Target == GL_VERTEX_PROGRAM_ARB &&
729           (var->data.location == VARYING_SLOT_FOGC ||
730            var->data.location == VARYING_SLOT_PSIZ)) {
731          /* result.{fogcoord,psiz} is a single component value */
732          src = nir_channel(b, src, 0);
733       }
734       unsigned num_components = glsl_get_vector_elements(var->type);
735       nir_store_var(b, var, src, (1 << num_components) - 1);
736    }
737 }
738 
739 static void
setup_registers_and_variables(struct ptn_compile * c)740 setup_registers_and_variables(struct ptn_compile *c)
741 {
742    nir_builder *b = &c->build;
743    struct nir_shader *shader = b->shader;
744 
745    /* Create input variables. */
746    uint64_t inputs_read = c->prog->info.inputs_read;
747    while (inputs_read) {
748       const int i = u_bit_scan64(&inputs_read);
749 
750       if (c->ctx->Const.GLSLFragCoordIsSysVal &&
751           shader->info.stage == MESA_SHADER_FRAGMENT &&
752           i == VARYING_SLOT_POS) {
753          c->input_vars[i] = nir_create_variable_with_location(shader, nir_var_system_value,
754                                                               SYSTEM_VALUE_FRAG_COORD, glsl_vec4_type());
755          continue;
756       }
757 
758       nir_variable *var =
759           nir_create_variable_with_location(shader, nir_var_shader_in,
760                                             i, glsl_vec4_type());
761 
762       if (c->prog->Target == GL_FRAGMENT_PROGRAM_ARB) {
763          if (i == VARYING_SLOT_FOGC) {
764             /* fogcoord is defined as <f, 0.0, 0.0, 1.0>.  Make the actual
765              * input variable a float, and create a local containing the
766              * full vec4 value.
767              */
768             var->type = glsl_float_type();
769 
770             nir_variable *fullvar =
771                nir_local_variable_create(b->impl, glsl_vec4_type(),
772                                          "fogcoord_tmp");
773 
774             nir_store_var(b, fullvar,
775                           nir_vec4(b, nir_load_var(b, var),
776                                    nir_imm_float(b, 0.0),
777                                    nir_imm_float(b, 0.0),
778                                    nir_imm_float(b, 1.0)),
779                           WRITEMASK_XYZW);
780 
781             /* We inserted the real input into the list so the driver has real
782              * inputs, but we set c->input_vars[i] to the temporary so we use
783              * the splatted value.
784              */
785             c->input_vars[i] = fullvar;
786             continue;
787          }
788       }
789 
790       c->input_vars[i] = var;
791    }
792 
793    /* Create system value variables */
794    int i;
795    BITSET_FOREACH_SET(i, c->prog->info.system_values_read, SYSTEM_VALUE_MAX) {
796       c->sysval_vars[i] = nir_create_variable_with_location(b->shader, nir_var_system_value,
797                                                             i, glsl_vec4_type());
798    }
799 
800    /* Create output registers and variables. */
801    int max_outputs = util_last_bit64(c->prog->info.outputs_written);
802    c->output_regs = rzalloc_array(c, nir_def *, max_outputs);
803 
804    uint64_t outputs_written = c->prog->info.outputs_written;
805    while (outputs_written) {
806       const int i = u_bit_scan64(&outputs_written);
807 
808       /* Since we can't load from outputs in the IR, we make temporaries
809        * for the outputs and emit stores to the real outputs at the end of
810        * the shader.
811        */
812       nir_def *reg = nir_decl_reg(b, 4, 32, 0);
813 
814       const struct glsl_type *type;
815       if ((c->prog->Target == GL_FRAGMENT_PROGRAM_ARB && i == FRAG_RESULT_DEPTH) ||
816           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_FOGC) ||
817           (c->prog->Target == GL_VERTEX_PROGRAM_ARB && i == VARYING_SLOT_PSIZ))
818          type = glsl_float_type();
819       else
820          type = glsl_vec4_type();
821 
822       nir_variable *var =
823          nir_variable_create(shader, nir_var_shader_out, type,
824                              ralloc_asprintf(shader, "out_%d", i));
825       var->data.location = i;
826       var->data.index = 0;
827 
828       c->output_regs[i] = reg;
829       c->output_vars[i] = var;
830    }
831 
832    /* Create temporary registers. */
833    c->temp_regs = rzalloc_array(c, nir_def *,
834                                 c->prog->arb.NumTemporaries);
835 
836    for (unsigned i = 0; i < c->prog->arb.NumTemporaries; i++) {
837       c->temp_regs[i] = nir_decl_reg(b, 4, 32, 0);
838    }
839 
840    /* Create the address register (for ARB_vertex_program). This is uniquely a
841     * scalar, requiring special handling for stores.
842     */
843    c->addr_reg = nir_decl_reg(b, 1, 32, 0);
844 }
845 
846 struct nir_shader *
prog_to_nir(const struct gl_context * ctx,const struct gl_program * prog,const nir_shader_compiler_options * options)847 prog_to_nir(const struct gl_context *ctx, const struct gl_program *prog,
848             const nir_shader_compiler_options *options)
849 {
850    struct ptn_compile *c;
851    struct nir_shader *s;
852    gl_shader_stage stage = _mesa_program_enum_to_shader_stage(prog->Target);
853 
854    c = rzalloc(NULL, struct ptn_compile);
855    if (!c)
856       return NULL;
857    c->prog = prog;
858    c->ctx = ctx;
859 
860    c->build = nir_builder_init_simple_shader(stage, options, NULL);
861 
862    /* Copy the shader_info from the gl_program */
863    c->build.shader->info = prog->info;
864 
865    s = c->build.shader;
866 
867    if (prog->Parameters->NumParameters > 0) {
868       const struct glsl_type *type =
869          glsl_array_type(glsl_vec4_type(), prog->Parameters->NumParameters, 0);
870       c->parameters =
871          nir_variable_create(s, nir_var_uniform, type,
872                              prog->Parameters->Parameters[0].Name);
873    }
874 
875    setup_registers_and_variables(c);
876    if (unlikely(c->error))
877       goto fail;
878 
879    for (unsigned int i = 0; i < prog->arb.NumInstructions; i++) {
880       ptn_emit_instruction(c, &prog->arb.Instructions[i]);
881 
882       if (unlikely(c->error))
883          break;
884    }
885 
886    ptn_add_output_stores(c);
887 
888    s->info.name = ralloc_asprintf(s, "ARB%d", prog->Id);
889    s->info.num_textures = util_last_bit(prog->SamplersUsed);
890    s->info.num_ubos = 0;
891    s->info.num_abos = 0;
892    s->info.num_ssbos = 0;
893    s->info.num_images = 0;
894    s->info.uses_texture_gather = false;
895    s->info.clip_distance_array_size = 0;
896    s->info.cull_distance_array_size = 0;
897    s->info.separate_shader = true;
898    s->info.io_lowered = false;
899    s->info.internal = false;
900 
901    /* ARB_vp: */
902    if (prog->arb.IsPositionInvariant) {
903       NIR_PASS(_, s, st_nir_lower_position_invariant,
904                  ctx->Const.ShaderCompilerOptions[MESA_SHADER_VERTEX].OptimizeForAOS,
905                  prog->Parameters);
906    }
907 
908    /* Add OPTION ARB_fog_exp code */
909    if (prog->arb.Fog)
910       NIR_PASS(_, s, st_nir_lower_fog, prog->arb.Fog, prog->Parameters);
911 
912 fail:
913    if (c->error) {
914       ralloc_free(s);
915       s = NULL;
916    }
917    ralloc_free(c);
918    return s;
919 }
920