xref: /aosp_15_r20/external/mesa3d/src/gallium/drivers/lima/ir/pp/nir.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright (c) 2017 Lima Project
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sub license,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the
12  * next paragraph) shall be included in all copies or substantial portions
13  * of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  *
23  */
24 
25 #include <string.h>
26 
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32 
33 
34 #include "ppir.h"
35 
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_def * ssa)36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa)
37 {
38    ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39    if (!node)
40       return NULL;
41 
42    ppir_dest *dest = ppir_node_get_dest(node);
43    dest->type = ppir_target_ssa;
44    dest->ssa.num_components = ssa->num_components;
45    dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46 
47    if (node->type == ppir_node_type_load ||
48        node->type == ppir_node_type_store)
49       dest->ssa.is_head = true;
50 
51    return node;
52 }
53 
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55                                   nir_def *def, unsigned mask)
56 {
57    ppir_node *node = ppir_node_create(block, op, def->index, mask);
58    if (!node)
59       return NULL;
60 
61    ppir_dest *dest = ppir_node_get_dest(node);
62 
63    list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64       if (r->index == def->index) {
65          dest->reg = r;
66          break;
67       }
68    }
69 
70    dest->type = ppir_target_register;
71    dest->write_mask = mask;
72 
73    if (node->type == ppir_node_type_load ||
74        node->type == ppir_node_type_store)
75       dest->reg->is_head = true;
76 
77    return node;
78 }
79 
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81                                    nir_def *def, unsigned mask)
82 {
83    if (!def)
84       return ppir_node_create(block, op, -1, 0);
85 
86    nir_intrinsic_instr *store = nir_store_reg_for_def(def);
87 
88    if (!store) /* is ssa */
89       return ppir_node_create_ssa(block, op, def);
90    else
91       return ppir_node_create_reg(block, op, store->src[1].ssa, nir_intrinsic_write_mask(store));
92 }
93 
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_src * ns,unsigned mask)94 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
95                               ppir_src *ps, nir_src *ns, unsigned mask)
96 {
97    ppir_node *child = NULL;
98    nir_intrinsic_instr *load = nir_load_reg_for_def(ns->ssa);
99 
100    if (!load) { /* is ssa */
101       child = comp->var_nodes[ns->ssa->index];
102       if (child->op != ppir_op_undef)
103          ppir_node_add_dep(node, child, ppir_dep_src);
104    }
105    else {
106       nir_def *rs = load->src[0].ssa;
107       while (mask) {
108          int swizzle = ps->swizzle[u_bit_scan(&mask)];
109          child = comp->var_nodes[(rs->index << 2) + swizzle];
110          /* Reg is read before it was written, create a dummy node for it */
111          if (!child) {
112             child = ppir_node_create_reg(node->block, ppir_op_dummy, rs,
113                u_bit_consecutive(0, 4));
114             comp->var_nodes[(rs->index << 2) + swizzle] = child;
115          }
116          /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117          if (child && node != child && child->op != ppir_op_dummy)
118             ppir_node_add_dep(node, child, ppir_dep_src);
119       }
120    }
121 
122    assert(child);
123    ppir_node_target_assign(ps, child);
124 }
125 
126 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
127    [nir_op_mov] = ppir_op_mov,
128    [nir_op_fmul] = ppir_op_mul,
129    [nir_op_fabs] = ppir_op_abs,
130    [nir_op_fneg] = ppir_op_neg,
131    [nir_op_fadd] = ppir_op_add,
132    [nir_op_fsum3] = ppir_op_sum3,
133    [nir_op_fsum4] = ppir_op_sum4,
134    [nir_op_frsq] = ppir_op_rsqrt,
135    [nir_op_flog2] = ppir_op_log2,
136    [nir_op_fexp2] = ppir_op_exp2,
137    [nir_op_fsqrt] = ppir_op_sqrt,
138    [nir_op_fsin] = ppir_op_sin,
139    [nir_op_fcos] = ppir_op_cos,
140    [nir_op_fmax] = ppir_op_max,
141    [nir_op_fmin] = ppir_op_min,
142    [nir_op_frcp] = ppir_op_rcp,
143    [nir_op_ffloor] = ppir_op_floor,
144    [nir_op_fceil] = ppir_op_ceil,
145    [nir_op_ffract] = ppir_op_fract,
146    [nir_op_sge] = ppir_op_ge,
147    [nir_op_slt] = ppir_op_lt,
148    [nir_op_seq] = ppir_op_eq,
149    [nir_op_sne] = ppir_op_ne,
150    [nir_op_fcsel] = ppir_op_select,
151    [nir_op_inot] = ppir_op_not,
152    [nir_op_ftrunc] = ppir_op_trunc,
153    [nir_op_fsat] = ppir_op_sat,
154    [nir_op_fclamp_pos_mali] = ppir_op_clamp_pos,
155 };
156 
ppir_emit_alu(ppir_block * block,nir_instr * ni)157 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
158 {
159    nir_alu_instr *instr = nir_instr_as_alu(ni);
160    nir_def *def = &instr->def;
161    int op = nir_to_ppir_opcodes[instr->op];
162 
163    if (op == ppir_op_unsupported) {
164       ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165       return false;
166    }
167    unsigned mask = nir_component_mask(def->num_components);
168    ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
169    if (!node)
170       return false;
171 
172    ppir_dest *pd = &node->dest;
173 
174    unsigned src_mask;
175    switch (op) {
176    case ppir_op_sum3:
177       src_mask = 0b0111;
178       break;
179    case ppir_op_sum4:
180       src_mask = 0b1111;
181       break;
182    default:
183       src_mask = pd->write_mask;
184       break;
185    }
186 
187    unsigned num_child = nir_op_infos[instr->op].num_inputs;
188    node->num_src = num_child;
189 
190    for (int i = 0; i < num_child; i++) {
191       nir_alu_src *alu_src = instr->src + i;
192       ppir_src *ps = node->src + i;
193       memcpy(ps->swizzle, alu_src->swizzle, sizeof(ps->swizzle));
194       ppir_node_add_src(block->comp, &node->node, ps, &alu_src->src, src_mask);
195    }
196 
197    list_addtail(&node->node.list, &block->node_list);
198    return true;
199 }
200 
ppir_emit_derivative(ppir_block * block,nir_instr * ni,int op)201 static bool ppir_emit_derivative(ppir_block *block, nir_instr *ni, int op)
202 {
203    assert(op == ppir_op_ddx || op == ppir_op_ddy);
204 
205    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
206    nir_def *def = &instr->def;
207 
208    unsigned mask = nir_component_mask(def->num_components);
209    ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
210    if (!node)
211       return false;
212 
213    ppir_dest *pd = &node->dest;
214    unsigned src_mask = pd->write_mask;
215    uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
216                            PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
217 
218    node->num_src = 1;
219    nir_src *intr_src = instr->src;
220    ppir_src *ps = node->src;
221    memcpy(ps->swizzle, identity, sizeof(identity));
222    ppir_node_add_src(block->comp, &node->node, ps, intr_src, src_mask);
223 
224    list_addtail(&node->node.list, &block->node_list);
225    return true;
226 }
227 
228 static ppir_block *ppir_block_create(ppir_compiler *comp);
229 
ppir_emit_discard_block(ppir_compiler * comp)230 static bool ppir_emit_discard_block(ppir_compiler *comp)
231 {
232    ppir_block *block = ppir_block_create(comp);
233    ppir_discard_node *discard;
234    if (!block)
235       return false;
236 
237    comp->discard_block = block;
238    block->comp  = comp;
239 
240    discard = ppir_node_create(block, ppir_op_discard, -1, 0);
241    if (discard)
242       list_addtail(&discard->node.list, &block->node_list);
243    else
244       return false;
245 
246    return true;
247 }
248 
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)249 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
250 {
251    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
252    ppir_node *node;
253    ppir_compiler *comp = block->comp;
254    ppir_branch_node *branch;
255 
256    if (!comp->discard_block && !ppir_emit_discard_block(comp))
257       return NULL;
258 
259    node = ppir_node_create(block, ppir_op_branch, -1, 0);
260    if (!node)
261       return NULL;
262    branch = ppir_node_to_branch(node);
263 
264    /* second src and condition will be updated during lowering */
265    ppir_node_add_src(block->comp, node, &branch->src[0],
266                      &instr->src[0], u_bit_consecutive(0, instr->num_components));
267    branch->num_src = 1;
268    branch->target = comp->discard_block;
269 
270    return node;
271 }
272 
ppir_emit_discard(ppir_block * block,nir_instr * ni)273 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
274 {
275    ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
276 
277    return node;
278 }
279 
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)280 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
281 {
282    ppir_node *node;
283    nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
284    unsigned mask = 0;
285    ppir_load_node *lnode;
286    ppir_alu_node *alu_node;
287 
288    switch (instr->intrinsic) {
289    case nir_intrinsic_decl_reg:
290    case nir_intrinsic_store_reg:
291       /* Nothing to do for these */
292       return true;
293 
294    case nir_intrinsic_load_reg: {
295       lnode = ppir_node_create_dest(block, ppir_op_dummy, &instr->def, mask);
296       return true;
297    }
298 
299    case nir_intrinsic_load_input: {
300       mask = u_bit_consecutive(0, instr->num_components);
301 
302       lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->def, mask);
303       if (!lnode)
304          return false;
305 
306       lnode->num_components = instr->num_components;
307       lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
308       if (nir_src_is_const(instr->src[0]))
309          lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
310       else {
311          lnode->num_src = 1;
312          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
313       }
314       list_addtail(&lnode->node.list, &block->node_list);
315       return true;
316    }
317 
318    case nir_intrinsic_load_frag_coord:
319    case nir_intrinsic_load_point_coord:
320    case nir_intrinsic_load_front_face: {
321       mask = u_bit_consecutive(0, instr->num_components);
322 
323       ppir_op op;
324       switch (instr->intrinsic) {
325       case nir_intrinsic_load_frag_coord:
326          op = ppir_op_load_fragcoord;
327          break;
328       case nir_intrinsic_load_point_coord:
329          op = ppir_op_load_pointcoord;
330          break;
331       case nir_intrinsic_load_front_face:
332          op = ppir_op_load_frontface;
333          break;
334       default:
335          unreachable("bad intrinsic");
336          break;
337       }
338 
339       lnode = ppir_node_create_dest(block, op, &instr->def, mask);
340       if (!lnode)
341          return false;
342 
343       lnode->num_components = instr->num_components;
344       list_addtail(&lnode->node.list, &block->node_list);
345       return true;
346    }
347 
348    case nir_intrinsic_load_uniform: {
349       mask = u_bit_consecutive(0, instr->num_components);
350 
351       lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->def, mask);
352       if (!lnode)
353          return false;
354 
355       lnode->num_components = instr->num_components;
356       lnode->index = nir_intrinsic_base(instr);
357       if (nir_src_is_const(instr->src[0]))
358          lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
359       else {
360          lnode->num_src = 1;
361          ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
362       }
363 
364       list_addtail(&lnode->node.list, &block->node_list);
365       return true;
366    }
367 
368    case nir_intrinsic_store_output: {
369       /* In simple cases where the store_output is ssa, that register
370        * can be directly marked as the output.
371        * If discard is used or the source is not ssa, things can get a
372        * lot more complicated, so don't try to optimize those and fall
373        * back to inserting a mov at the end.
374        * If the source node will only be able to output to pipeline
375        * registers, fall back to the mov as well. */
376       assert(nir_src_is_const(instr->src[1]) &&
377              "lima doesn't support indirect outputs");
378 
379       nir_io_semantics io = nir_intrinsic_io_semantics(instr);
380       unsigned offset = nir_src_as_uint(instr->src[1]);
381       unsigned slot = io.location + offset;
382       ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
383          block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
384       if (out_type == ppir_output_invalid) {
385          ppir_debug("Unsupported output type: %d\n", slot);
386          return false;
387       }
388 
389       if (!block->comp->uses_discard) {
390          node = block->comp->var_nodes[instr->src->ssa->index];
391          assert(node);
392          switch (node->op) {
393          case ppir_op_load_uniform:
394          case ppir_op_load_texture:
395          case ppir_op_dummy:
396          case ppir_op_const:
397             break;
398          default: {
399             ppir_dest *dest = ppir_node_get_dest(node);
400             dest->ssa.out_type = out_type;
401             dest->ssa.num_components = 4;
402             dest->write_mask = u_bit_consecutive(0, 4);
403             node->is_out = 1;
404             return true;
405             }
406          }
407       }
408 
409       alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
410       if (!alu_node)
411          return false;
412 
413       ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
414       dest->type = ppir_target_ssa;
415       dest->ssa.num_components = 4;
416       dest->ssa.index = 0;
417       dest->write_mask = u_bit_consecutive(0, 4);
418       dest->ssa.out_type = out_type;
419 
420       alu_node->num_src = 1;
421 
422       for (int i = 0; i < instr->num_components; i++)
423          alu_node->src[0].swizzle[i] = i;
424 
425       ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
426                         u_bit_consecutive(0, 4));
427 
428       alu_node->node.is_out = 1;
429 
430       list_addtail(&alu_node->node.list, &block->node_list);
431       return true;
432    }
433 
434    case nir_intrinsic_terminate:
435       node = ppir_emit_discard(block, ni);
436       list_addtail(&node->list, &block->node_list);
437       return true;
438 
439    case nir_intrinsic_terminate_if:
440       node = ppir_emit_discard_if(block, ni);
441       list_addtail(&node->list, &block->node_list);
442       return true;
443 
444    case nir_intrinsic_ddx:
445       return ppir_emit_derivative(block, ni, ppir_op_ddx);
446    case nir_intrinsic_ddy:
447       return ppir_emit_derivative(block, ni, ppir_op_ddy);
448 
449    default:
450       ppir_error("unsupported nir_intrinsic_instr %s\n",
451                  nir_intrinsic_infos[instr->intrinsic].name);
452       return false;
453    }
454 }
455 
ppir_emit_load_const(ppir_block * block,nir_instr * ni)456 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
457 {
458    nir_load_const_instr *instr = nir_instr_as_load_const(ni);
459    ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
460    if (!node)
461       return false;
462 
463    assert(instr->def.bit_size == 32);
464 
465    for (int i = 0; i < instr->def.num_components; i++)
466       node->constant.value[i].i = instr->value[i].i32;
467    node->constant.num = instr->def.num_components;
468 
469    list_addtail(&node->node.list, &block->node_list);
470    return true;
471 }
472 
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)473 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
474 {
475    nir_undef_instr *undef = nir_instr_as_undef(ni);
476    ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
477    if (!node)
478       return false;
479    ppir_alu_node *alu = ppir_node_to_alu(node);
480 
481    ppir_dest *dest = &alu->dest;
482    dest->ssa.undef = true;
483 
484    list_addtail(&node->list, &block->node_list);
485    return true;
486 }
487 
ppir_emit_tex(ppir_block * block,nir_instr * ni)488 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
489 {
490    nir_tex_instr *instr = nir_instr_as_tex(ni);
491    ppir_load_texture_node *node;
492 
493    switch (instr->op) {
494    case nir_texop_tex:
495    case nir_texop_txb:
496    case nir_texop_txl:
497       break;
498    default:
499       ppir_error("unsupported texop %d\n", instr->op);
500       return false;
501    }
502 
503    switch (instr->sampler_dim) {
504    case GLSL_SAMPLER_DIM_1D:
505    case GLSL_SAMPLER_DIM_2D:
506    case GLSL_SAMPLER_DIM_3D:
507    case GLSL_SAMPLER_DIM_CUBE:
508    case GLSL_SAMPLER_DIM_RECT:
509    case GLSL_SAMPLER_DIM_EXTERNAL:
510       break;
511    default:
512       ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
513       return false;
514    }
515 
516    /* emit ld_tex node */
517 
518    unsigned mask = 0;
519    mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
520 
521    node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->def, mask);
522    if (!node)
523       return false;
524 
525    node->sampler = instr->texture_index;
526    node->sampler_dim = instr->sampler_dim;
527 
528    for (int i = 0; i < instr->coord_components; i++)
529          node->src[0].swizzle[i] = i;
530 
531    bool perspective = false;
532 
533    for (int i = 0; i < instr->num_srcs; i++) {
534       switch (instr->src[i].src_type) {
535       case nir_tex_src_backend1:
536          perspective = true;
537          FALLTHROUGH;
538       case nir_tex_src_coord: {
539          nir_src *ns = &instr->src[i].src;
540          ppir_node *child = block->comp->var_nodes[ns->ssa->index];
541          if (child->op == ppir_op_load_varying) {
542             /* If the successor is load_texture, promote it to load_coords */
543             nir_tex_src *nts = (nir_tex_src *)ns;
544             if (nts->src_type == nir_tex_src_coord ||
545                 nts->src_type == nir_tex_src_backend1)
546                child->op = ppir_op_load_coords;
547          }
548 
549          /* src[0] is not used by the ld_tex instruction but ensures
550           * correct scheduling due to the pipeline dependency */
551          ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
552                            u_bit_consecutive(0, instr->coord_components));
553          node->num_src++;
554          break;
555       }
556       case nir_tex_src_bias:
557       case nir_tex_src_lod:
558          node->lod_bias_en = true;
559          node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
560          ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
561          node->num_src++;
562          break;
563       default:
564          ppir_error("unsupported texture source type\n");
565          return false;
566       }
567    }
568 
569    list_addtail(&node->node.list, &block->node_list);
570 
571    /* validate load coords node */
572 
573    ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
574    ppir_load_node *load = NULL;
575 
576    if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
577        (src_coords->op == ppir_op_load_coords))
578       load = ppir_node_to_load(src_coords);
579    else {
580       /* Create load_coords node */
581       load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
582       if (!load)
583          return false;
584       list_addtail(&load->node.list, &block->node_list);
585 
586       load->src = node->src[0];
587       load->num_src = 1;
588       load->num_components = instr->coord_components;
589 
590       ppir_debug("%s create load_coords node %d for %d\n",
591                  __func__, load->index, node->node.index);
592 
593       ppir_node_foreach_pred_safe((&node->node), dep) {
594          ppir_node *pred = dep->pred;
595          ppir_node_remove_dep(dep);
596          ppir_node_add_dep(&load->node, pred, ppir_dep_src);
597       }
598       ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
599    }
600 
601    assert(load);
602 
603    if (perspective) {
604       if (instr->coord_components == 3)
605          load->perspective = ppir_perspective_z;
606       else
607          load->perspective = ppir_perspective_w;
608    }
609 
610    load->sampler_dim = instr->sampler_dim;
611    node->src[0].type = load->dest.type = ppir_target_pipeline;
612    node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
613 
614    return true;
615 }
616 
ppir_get_block(ppir_compiler * comp,nir_block * nblock)617 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
618 {
619    ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
620 
621    return block;
622 }
623 
ppir_emit_jump(ppir_block * block,nir_instr * ni)624 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
625 {
626    ppir_node *node;
627    ppir_compiler *comp = block->comp;
628    ppir_branch_node *branch;
629    ppir_block *jump_block;
630    nir_jump_instr *jump = nir_instr_as_jump(ni);
631 
632    switch (jump->type) {
633    case nir_jump_break: {
634       assert(comp->current_block->successors[0]);
635       assert(!comp->current_block->successors[1]);
636       jump_block = comp->current_block->successors[0];
637    }
638    break;
639    case nir_jump_continue:
640       jump_block = comp->loop_cont_block;
641    break;
642    default:
643       ppir_error("nir_jump_instr not support\n");
644       return false;
645    }
646 
647    assert(jump_block != NULL);
648 
649    node = ppir_node_create(block, ppir_op_branch, -1, 0);
650    if (!node)
651       return false;
652    branch = ppir_node_to_branch(node);
653 
654    /* Unconditional */
655    branch->num_src = 0;
656    branch->target = jump_block;
657 
658    list_addtail(&node->list, &block->node_list);
659    return true;
660 }
661 
662 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
663    [nir_instr_type_alu]        = ppir_emit_alu,
664    [nir_instr_type_intrinsic]  = ppir_emit_intrinsic,
665    [nir_instr_type_load_const] = ppir_emit_load_const,
666    [nir_instr_type_undef]      = ppir_emit_ssa_undef,
667    [nir_instr_type_tex]        = ppir_emit_tex,
668    [nir_instr_type_jump]       = ppir_emit_jump,
669 };
670 
ppir_block_create(ppir_compiler * comp)671 static ppir_block *ppir_block_create(ppir_compiler *comp)
672 {
673    ppir_block *block = rzalloc(comp, ppir_block);
674    if (!block)
675       return NULL;
676 
677    list_inithead(&block->node_list);
678    list_inithead(&block->instr_list);
679 
680    block->comp = comp;
681 
682    return block;
683 }
684 
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)685 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
686 {
687    ppir_block *block = ppir_get_block(comp, nblock);
688 
689    comp->current_block = block;
690 
691    list_addtail(&block->list, &comp->block_list);
692 
693    nir_foreach_instr(instr, nblock) {
694       assert(instr->type < nir_instr_type_phi);
695       if (!ppir_emit_instr[instr->type](block, instr))
696          return false;
697    }
698 
699    return true;
700 }
701 
702 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
703 
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)704 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
705 {
706    ppir_node *node;
707    ppir_branch_node *else_branch, *after_branch;
708    nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
709    bool empty_else_block =
710       (nir_else_block == nir_if_last_else_block(if_stmt) &&
711       exec_list_is_empty(&nir_else_block->instr_list));
712    ppir_block *block = comp->current_block;
713 
714    node = ppir_node_create(block, ppir_op_branch, -1, 0);
715    if (!node)
716       return false;
717    else_branch = ppir_node_to_branch(node);
718    ppir_node_add_src(block->comp, node, &else_branch->src[0],
719                      &if_stmt->condition, 1);
720    else_branch->num_src = 1;
721    /* Negate condition to minimize branching. We're generating following:
722     * current_block: { ...; if (!statement) branch else_block; }
723     * then_block: { ...; branch after_block; }
724     * else_block: { ... }
725     * after_block: { ... }
726     *
727     * or if else list is empty:
728     * block: { if (!statement) branch else_block; }
729     * then_block: { ... }
730     * else_block: after_block: { ... }
731     */
732    else_branch->negate = true;
733    list_addtail(&else_branch->node.list, &block->node_list);
734 
735    if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
736       return false;
737 
738    if (empty_else_block) {
739       nir_block *nblock = nir_if_last_else_block(if_stmt);
740       assert(nblock->successors[0]);
741       assert(!nblock->successors[1]);
742       else_branch->target = ppir_get_block(comp, nblock->successors[0]);
743       /* Add empty else block to the list */
744       list_addtail(&block->successors[1]->list, &comp->block_list);
745       return true;
746    }
747 
748    else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
749 
750    nir_block *last_then_block = nir_if_last_then_block(if_stmt);
751    assert(last_then_block->successors[0]);
752    assert(!last_then_block->successors[1]);
753    block = ppir_get_block(comp, last_then_block);
754    node = ppir_node_create(block, ppir_op_branch, -1, 0);
755    if (!node)
756       return false;
757    after_branch = ppir_node_to_branch(node);
758    /* Unconditional */
759    after_branch->num_src = 0;
760    after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
761    /* Target should be after_block, will fixup later */
762    list_addtail(&after_branch->node.list, &block->node_list);
763 
764    if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
765       return false;
766 
767    return true;
768 }
769 
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)770 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
771 {
772    assert(!nir_loop_has_continue_construct(nloop));
773    ppir_block *save_loop_cont_block = comp->loop_cont_block;
774    ppir_block *block;
775    ppir_branch_node *loop_branch;
776    nir_block *loop_last_block;
777    ppir_node *node;
778 
779    comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
780 
781    if (!ppir_emit_cf_list(comp, &nloop->body))
782       return false;
783 
784    loop_last_block = nir_loop_last_block(nloop);
785    block = ppir_get_block(comp, loop_last_block);
786    node = ppir_node_create(block, ppir_op_branch, -1, 0);
787    if (!node)
788       return false;
789    loop_branch = ppir_node_to_branch(node);
790    /* Unconditional */
791    loop_branch->num_src = 0;
792    loop_branch->target = comp->loop_cont_block;
793    list_addtail(&loop_branch->node.list, &block->node_list);
794 
795    comp->loop_cont_block = save_loop_cont_block;
796 
797    comp->num_loops++;
798 
799    return true;
800 }
801 
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)802 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
803 {
804    ppir_error("function nir_cf_node not support\n");
805    return false;
806 }
807 
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)808 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
809 {
810    foreach_list_typed(nir_cf_node, node, node, list) {
811       bool ret;
812 
813       switch (node->type) {
814       case nir_cf_node_block:
815          ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
816          break;
817       case nir_cf_node_if:
818          ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
819          break;
820       case nir_cf_node_loop:
821          ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
822          break;
823       case nir_cf_node_function:
824          ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
825          break;
826       default:
827          ppir_error("unknown NIR node type %d\n", node->type);
828          return false;
829       }
830 
831       if (!ret)
832          return false;
833    }
834 
835    return true;
836 }
837 
ppir_compiler_create(void * prog,unsigned num_ssa)838 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa)
839 {
840    ppir_compiler *comp = rzalloc_size(
841       prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *));
842    if (!comp)
843       return NULL;
844 
845    list_inithead(&comp->block_list);
846    list_inithead(&comp->reg_list);
847    comp->reg_num = 0;
848    comp->blocks = _mesa_hash_table_u64_create(prog);
849 
850    comp->var_nodes = (ppir_node **)(comp + 1);
851    comp->prog = prog;
852 
853    return comp;
854 }
855 
ppir_add_ordering_deps(ppir_compiler * comp)856 static void ppir_add_ordering_deps(ppir_compiler *comp)
857 {
858    /* Some intrinsics do not have explicit dependencies and thus depend
859     * on instructions order. Consider discard_if and the is_end node as
860     * example. If we don't add fake dependency of discard_if to is_end,
861     * scheduler may put the is_end first and since is_end terminates
862     * shader on Utgard PP, rest of it will never be executed.
863     * Add fake dependencies for discard/branch/store to preserve
864     * instruction order.
865     *
866     * TODO: scheduler should schedule discard_if as early as possible otherwise
867     * we may end up with suboptimal code for cases like this:
868     *
869     * s3 = s1 < s2
870     * discard_if s3
871     * s4 = s1 + s2
872     * store s4
873     *
874     * In this case store depends on discard_if and s4, but since dependencies can
875     * be scheduled in any order it can result in code like this:
876     *
877     * instr1: s3 = s1 < s3
878     * instr2: s4 = s1 + s2
879     * instr3: discard_if s3
880     * instr4: store s4
881     */
882    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
883       ppir_node *prev_node = NULL;
884       list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
885          if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
886             ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
887          }
888          if (node->is_out ||
889              node->op == ppir_op_discard ||
890              node->op == ppir_op_store_temp ||
891              node->op == ppir_op_branch) {
892             prev_node = node;
893          }
894       }
895    }
896 }
897 
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)898 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
899                                  struct util_debug_callback *debug)
900 {
901    const struct shader_info *info = &nir->info;
902    char *shaderdb;
903    ASSERTED int ret = asprintf(&shaderdb,
904                                "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
905                                gl_shader_stage_name(info->stage),
906                                comp->cur_instr_index,
907                                comp->num_loops,
908                                comp->num_spills,
909                                comp->num_fills);
910    assert(ret >= 0);
911 
912    if (lima_debug & LIMA_DEBUG_SHADERDB)
913       fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
914 
915    util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
916    free(shaderdb);
917 }
918 
ppir_add_write_after_read_deps(ppir_compiler * comp)919 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
920 {
921    list_for_each_entry(ppir_block, block, &comp->block_list, list) {
922       list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
923          ppir_node *write = NULL;
924          list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
925             for (int i = 0; i < ppir_node_get_src_num(node); i++) {
926                ppir_src *src = ppir_node_get_src(node, i);
927                if (src && src->type == ppir_target_register &&
928                    src->reg == reg &&
929                    write) {
930                   ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
931                   ppir_node_add_dep(write, node, ppir_dep_write_after_read);
932                }
933             }
934             ppir_dest *dest = ppir_node_get_dest(node);
935             if (dest && dest->type == ppir_target_register &&
936                 dest->reg == reg)
937                write = node;
938          }
939       }
940    }
941 }
942 
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)943 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
944                       struct ra_regs *ra,
945                       struct util_debug_callback *debug)
946 {
947    nir_function_impl *func = nir_shader_get_entrypoint(nir);
948    ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc);
949    if (!comp)
950       return false;
951 
952    comp->ra = ra;
953    comp->uses_discard = nir->info.fs.uses_discard;
954    comp->dual_source_blend = nir->info.fs.color_is_dual_source;
955 
956    /* 1st pass: create ppir blocks */
957    nir_foreach_function_impl(impl, nir) {
958       nir_foreach_block(nblock, impl) {
959          ppir_block *block = ppir_block_create(comp);
960          if (!block)
961             return false;
962          block->index = nblock->index;
963          _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
964       }
965    }
966 
967    /* 2nd pass: populate successors */
968    nir_foreach_function_impl(impl, nir) {
969       nir_foreach_block(nblock, impl) {
970          ppir_block *block = ppir_get_block(comp, nblock);
971          assert(block);
972 
973          for (int i = 0; i < 2; i++) {
974             if (nblock->successors[i])
975                block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
976          }
977       }
978    }
979 
980    comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
981 
982    /* -1 means reg is not written by the shader */
983    for (int i = 0; i < ppir_output_num; i++)
984       comp->out_type_to_reg[i] = -1;
985 
986    nir_foreach_reg_decl(decl, func) {
987       ppir_reg *r = rzalloc(comp, ppir_reg);
988       if (!r)
989          return false;
990 
991       r->index = decl->def.index;
992       r->num_components = nir_intrinsic_num_components(decl);
993       r->is_head = false;
994       list_addtail(&r->list, &comp->reg_list);
995       comp->reg_num++;
996    }
997 
998    if (!ppir_emit_cf_list(comp, &func->body))
999       goto err_out0;
1000 
1001    /* If we have discard block add it to the very end */
1002    if (comp->discard_block)
1003       list_addtail(&comp->discard_block->list, &comp->block_list);
1004 
1005    ppir_node_print_prog(comp);
1006 
1007    if (!ppir_lower_prog(comp))
1008       goto err_out0;
1009 
1010    ppir_add_ordering_deps(comp);
1011    ppir_add_write_after_read_deps(comp);
1012 
1013    ppir_node_print_prog(comp);
1014 
1015    if (!ppir_node_to_instr(comp))
1016       goto err_out0;
1017 
1018    if (!ppir_schedule_prog(comp))
1019       goto err_out0;
1020 
1021    if (!ppir_regalloc_prog(comp))
1022       goto err_out0;
1023 
1024    if (!ppir_codegen_prog(comp))
1025       goto err_out0;
1026 
1027    ppir_print_shader_db(nir, comp, debug);
1028 
1029    _mesa_hash_table_u64_destroy(comp->blocks);
1030    ralloc_free(comp);
1031    return true;
1032 
1033 err_out0:
1034    _mesa_hash_table_u64_destroy(comp->blocks);
1035    ralloc_free(comp);
1036    return false;
1037 }
1038 
1039