1 /*
2 * Copyright (c) 2017 Lima Project
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sub license,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the
12 * next paragraph) shall be included in all copies or substantial portions
13 * of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
22 *
23 */
24
25 #include <string.h>
26
27 #include "util/hash_table.h"
28 #include "util/ralloc.h"
29 #include "util/bitscan.h"
30 #include "compiler/nir/nir.h"
31 #include "pipe/p_state.h"
32
33
34 #include "ppir.h"
35
ppir_node_create_ssa(ppir_block * block,ppir_op op,nir_def * ssa)36 static void *ppir_node_create_ssa(ppir_block *block, ppir_op op, nir_def *ssa)
37 {
38 ppir_node *node = ppir_node_create(block, op, ssa->index, 0);
39 if (!node)
40 return NULL;
41
42 ppir_dest *dest = ppir_node_get_dest(node);
43 dest->type = ppir_target_ssa;
44 dest->ssa.num_components = ssa->num_components;
45 dest->write_mask = u_bit_consecutive(0, ssa->num_components);
46
47 if (node->type == ppir_node_type_load ||
48 node->type == ppir_node_type_store)
49 dest->ssa.is_head = true;
50
51 return node;
52 }
53
ppir_node_create_reg(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)54 static void *ppir_node_create_reg(ppir_block *block, ppir_op op,
55 nir_def *def, unsigned mask)
56 {
57 ppir_node *node = ppir_node_create(block, op, def->index, mask);
58 if (!node)
59 return NULL;
60
61 ppir_dest *dest = ppir_node_get_dest(node);
62
63 list_for_each_entry(ppir_reg, r, &block->comp->reg_list, list) {
64 if (r->index == def->index) {
65 dest->reg = r;
66 break;
67 }
68 }
69
70 dest->type = ppir_target_register;
71 dest->write_mask = mask;
72
73 if (node->type == ppir_node_type_load ||
74 node->type == ppir_node_type_store)
75 dest->reg->is_head = true;
76
77 return node;
78 }
79
ppir_node_create_dest(ppir_block * block,ppir_op op,nir_def * def,unsigned mask)80 static void *ppir_node_create_dest(ppir_block *block, ppir_op op,
81 nir_def *def, unsigned mask)
82 {
83 if (!def)
84 return ppir_node_create(block, op, -1, 0);
85
86 nir_intrinsic_instr *store = nir_store_reg_for_def(def);
87
88 if (!store) /* is ssa */
89 return ppir_node_create_ssa(block, op, def);
90 else
91 return ppir_node_create_reg(block, op, store->src[1].ssa, nir_intrinsic_write_mask(store));
92 }
93
ppir_node_add_src(ppir_compiler * comp,ppir_node * node,ppir_src * ps,nir_src * ns,unsigned mask)94 static void ppir_node_add_src(ppir_compiler *comp, ppir_node *node,
95 ppir_src *ps, nir_src *ns, unsigned mask)
96 {
97 ppir_node *child = NULL;
98 nir_intrinsic_instr *load = nir_load_reg_for_def(ns->ssa);
99
100 if (!load) { /* is ssa */
101 child = comp->var_nodes[ns->ssa->index];
102 if (child->op != ppir_op_undef)
103 ppir_node_add_dep(node, child, ppir_dep_src);
104 }
105 else {
106 nir_def *rs = load->src[0].ssa;
107 while (mask) {
108 int swizzle = ps->swizzle[u_bit_scan(&mask)];
109 child = comp->var_nodes[(rs->index << 2) + swizzle];
110 /* Reg is read before it was written, create a dummy node for it */
111 if (!child) {
112 child = ppir_node_create_reg(node->block, ppir_op_dummy, rs,
113 u_bit_consecutive(0, 4));
114 comp->var_nodes[(rs->index << 2) + swizzle] = child;
115 }
116 /* Don't add dummies or recursive deps for ops like r1 = r1 + ssa1 */
117 if (child && node != child && child->op != ppir_op_dummy)
118 ppir_node_add_dep(node, child, ppir_dep_src);
119 }
120 }
121
122 assert(child);
123 ppir_node_target_assign(ps, child);
124 }
125
126 static int nir_to_ppir_opcodes[nir_num_opcodes] = {
127 [nir_op_mov] = ppir_op_mov,
128 [nir_op_fmul] = ppir_op_mul,
129 [nir_op_fabs] = ppir_op_abs,
130 [nir_op_fneg] = ppir_op_neg,
131 [nir_op_fadd] = ppir_op_add,
132 [nir_op_fsum3] = ppir_op_sum3,
133 [nir_op_fsum4] = ppir_op_sum4,
134 [nir_op_frsq] = ppir_op_rsqrt,
135 [nir_op_flog2] = ppir_op_log2,
136 [nir_op_fexp2] = ppir_op_exp2,
137 [nir_op_fsqrt] = ppir_op_sqrt,
138 [nir_op_fsin] = ppir_op_sin,
139 [nir_op_fcos] = ppir_op_cos,
140 [nir_op_fmax] = ppir_op_max,
141 [nir_op_fmin] = ppir_op_min,
142 [nir_op_frcp] = ppir_op_rcp,
143 [nir_op_ffloor] = ppir_op_floor,
144 [nir_op_fceil] = ppir_op_ceil,
145 [nir_op_ffract] = ppir_op_fract,
146 [nir_op_sge] = ppir_op_ge,
147 [nir_op_slt] = ppir_op_lt,
148 [nir_op_seq] = ppir_op_eq,
149 [nir_op_sne] = ppir_op_ne,
150 [nir_op_fcsel] = ppir_op_select,
151 [nir_op_inot] = ppir_op_not,
152 [nir_op_ftrunc] = ppir_op_trunc,
153 [nir_op_fsat] = ppir_op_sat,
154 [nir_op_fclamp_pos_mali] = ppir_op_clamp_pos,
155 };
156
ppir_emit_alu(ppir_block * block,nir_instr * ni)157 static bool ppir_emit_alu(ppir_block *block, nir_instr *ni)
158 {
159 nir_alu_instr *instr = nir_instr_as_alu(ni);
160 nir_def *def = &instr->def;
161 int op = nir_to_ppir_opcodes[instr->op];
162
163 if (op == ppir_op_unsupported) {
164 ppir_error("unsupported nir_op: %s\n", nir_op_infos[instr->op].name);
165 return false;
166 }
167 unsigned mask = nir_component_mask(def->num_components);
168 ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
169 if (!node)
170 return false;
171
172 ppir_dest *pd = &node->dest;
173
174 unsigned src_mask;
175 switch (op) {
176 case ppir_op_sum3:
177 src_mask = 0b0111;
178 break;
179 case ppir_op_sum4:
180 src_mask = 0b1111;
181 break;
182 default:
183 src_mask = pd->write_mask;
184 break;
185 }
186
187 unsigned num_child = nir_op_infos[instr->op].num_inputs;
188 node->num_src = num_child;
189
190 for (int i = 0; i < num_child; i++) {
191 nir_alu_src *alu_src = instr->src + i;
192 ppir_src *ps = node->src + i;
193 memcpy(ps->swizzle, alu_src->swizzle, sizeof(ps->swizzle));
194 ppir_node_add_src(block->comp, &node->node, ps, &alu_src->src, src_mask);
195 }
196
197 list_addtail(&node->node.list, &block->node_list);
198 return true;
199 }
200
ppir_emit_derivative(ppir_block * block,nir_instr * ni,int op)201 static bool ppir_emit_derivative(ppir_block *block, nir_instr *ni, int op)
202 {
203 assert(op == ppir_op_ddx || op == ppir_op_ddy);
204
205 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
206 nir_def *def = &instr->def;
207
208 unsigned mask = nir_component_mask(def->num_components);
209 ppir_alu_node *node = ppir_node_create_dest(block, op, def, mask);
210 if (!node)
211 return false;
212
213 ppir_dest *pd = &node->dest;
214 unsigned src_mask = pd->write_mask;
215 uint8_t identity[4] = { PIPE_SWIZZLE_X, PIPE_SWIZZLE_Y,
216 PIPE_SWIZZLE_Z, PIPE_SWIZZLE_W };
217
218 node->num_src = 1;
219 nir_src *intr_src = instr->src;
220 ppir_src *ps = node->src;
221 memcpy(ps->swizzle, identity, sizeof(identity));
222 ppir_node_add_src(block->comp, &node->node, ps, intr_src, src_mask);
223
224 list_addtail(&node->node.list, &block->node_list);
225 return true;
226 }
227
228 static ppir_block *ppir_block_create(ppir_compiler *comp);
229
ppir_emit_discard_block(ppir_compiler * comp)230 static bool ppir_emit_discard_block(ppir_compiler *comp)
231 {
232 ppir_block *block = ppir_block_create(comp);
233 ppir_discard_node *discard;
234 if (!block)
235 return false;
236
237 comp->discard_block = block;
238 block->comp = comp;
239
240 discard = ppir_node_create(block, ppir_op_discard, -1, 0);
241 if (discard)
242 list_addtail(&discard->node.list, &block->node_list);
243 else
244 return false;
245
246 return true;
247 }
248
ppir_emit_discard_if(ppir_block * block,nir_instr * ni)249 static ppir_node *ppir_emit_discard_if(ppir_block *block, nir_instr *ni)
250 {
251 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
252 ppir_node *node;
253 ppir_compiler *comp = block->comp;
254 ppir_branch_node *branch;
255
256 if (!comp->discard_block && !ppir_emit_discard_block(comp))
257 return NULL;
258
259 node = ppir_node_create(block, ppir_op_branch, -1, 0);
260 if (!node)
261 return NULL;
262 branch = ppir_node_to_branch(node);
263
264 /* second src and condition will be updated during lowering */
265 ppir_node_add_src(block->comp, node, &branch->src[0],
266 &instr->src[0], u_bit_consecutive(0, instr->num_components));
267 branch->num_src = 1;
268 branch->target = comp->discard_block;
269
270 return node;
271 }
272
ppir_emit_discard(ppir_block * block,nir_instr * ni)273 static ppir_node *ppir_emit_discard(ppir_block *block, nir_instr *ni)
274 {
275 ppir_node *node = ppir_node_create(block, ppir_op_discard, -1, 0);
276
277 return node;
278 }
279
ppir_emit_intrinsic(ppir_block * block,nir_instr * ni)280 static bool ppir_emit_intrinsic(ppir_block *block, nir_instr *ni)
281 {
282 ppir_node *node;
283 nir_intrinsic_instr *instr = nir_instr_as_intrinsic(ni);
284 unsigned mask = 0;
285 ppir_load_node *lnode;
286 ppir_alu_node *alu_node;
287
288 switch (instr->intrinsic) {
289 case nir_intrinsic_decl_reg:
290 case nir_intrinsic_store_reg:
291 /* Nothing to do for these */
292 return true;
293
294 case nir_intrinsic_load_reg: {
295 lnode = ppir_node_create_dest(block, ppir_op_dummy, &instr->def, mask);
296 return true;
297 }
298
299 case nir_intrinsic_load_input: {
300 mask = u_bit_consecutive(0, instr->num_components);
301
302 lnode = ppir_node_create_dest(block, ppir_op_load_varying, &instr->def, mask);
303 if (!lnode)
304 return false;
305
306 lnode->num_components = instr->num_components;
307 lnode->index = nir_intrinsic_base(instr) * 4 + nir_intrinsic_component(instr);
308 if (nir_src_is_const(instr->src[0]))
309 lnode->index += (uint32_t)(nir_src_as_float(instr->src[0]) * 4);
310 else {
311 lnode->num_src = 1;
312 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
313 }
314 list_addtail(&lnode->node.list, &block->node_list);
315 return true;
316 }
317
318 case nir_intrinsic_load_frag_coord:
319 case nir_intrinsic_load_point_coord:
320 case nir_intrinsic_load_front_face: {
321 mask = u_bit_consecutive(0, instr->num_components);
322
323 ppir_op op;
324 switch (instr->intrinsic) {
325 case nir_intrinsic_load_frag_coord:
326 op = ppir_op_load_fragcoord;
327 break;
328 case nir_intrinsic_load_point_coord:
329 op = ppir_op_load_pointcoord;
330 break;
331 case nir_intrinsic_load_front_face:
332 op = ppir_op_load_frontface;
333 break;
334 default:
335 unreachable("bad intrinsic");
336 break;
337 }
338
339 lnode = ppir_node_create_dest(block, op, &instr->def, mask);
340 if (!lnode)
341 return false;
342
343 lnode->num_components = instr->num_components;
344 list_addtail(&lnode->node.list, &block->node_list);
345 return true;
346 }
347
348 case nir_intrinsic_load_uniform: {
349 mask = u_bit_consecutive(0, instr->num_components);
350
351 lnode = ppir_node_create_dest(block, ppir_op_load_uniform, &instr->def, mask);
352 if (!lnode)
353 return false;
354
355 lnode->num_components = instr->num_components;
356 lnode->index = nir_intrinsic_base(instr);
357 if (nir_src_is_const(instr->src[0]))
358 lnode->index += (uint32_t)nir_src_as_float(instr->src[0]);
359 else {
360 lnode->num_src = 1;
361 ppir_node_add_src(block->comp, &lnode->node, &lnode->src, instr->src, 1);
362 }
363
364 list_addtail(&lnode->node.list, &block->node_list);
365 return true;
366 }
367
368 case nir_intrinsic_store_output: {
369 /* In simple cases where the store_output is ssa, that register
370 * can be directly marked as the output.
371 * If discard is used or the source is not ssa, things can get a
372 * lot more complicated, so don't try to optimize those and fall
373 * back to inserting a mov at the end.
374 * If the source node will only be able to output to pipeline
375 * registers, fall back to the mov as well. */
376 assert(nir_src_is_const(instr->src[1]) &&
377 "lima doesn't support indirect outputs");
378
379 nir_io_semantics io = nir_intrinsic_io_semantics(instr);
380 unsigned offset = nir_src_as_uint(instr->src[1]);
381 unsigned slot = io.location + offset;
382 ppir_output_type out_type = ppir_nir_output_to_ppir(slot,
383 block->comp->dual_source_blend ? io.dual_source_blend_index : 0);
384 if (out_type == ppir_output_invalid) {
385 ppir_debug("Unsupported output type: %d\n", slot);
386 return false;
387 }
388
389 if (!block->comp->uses_discard) {
390 node = block->comp->var_nodes[instr->src->ssa->index];
391 assert(node);
392 switch (node->op) {
393 case ppir_op_load_uniform:
394 case ppir_op_load_texture:
395 case ppir_op_dummy:
396 case ppir_op_const:
397 break;
398 default: {
399 ppir_dest *dest = ppir_node_get_dest(node);
400 dest->ssa.out_type = out_type;
401 dest->ssa.num_components = 4;
402 dest->write_mask = u_bit_consecutive(0, 4);
403 node->is_out = 1;
404 return true;
405 }
406 }
407 }
408
409 alu_node = ppir_node_create_dest(block, ppir_op_mov, NULL, 0);
410 if (!alu_node)
411 return false;
412
413 ppir_dest *dest = ppir_node_get_dest(&alu_node->node);
414 dest->type = ppir_target_ssa;
415 dest->ssa.num_components = 4;
416 dest->ssa.index = 0;
417 dest->write_mask = u_bit_consecutive(0, 4);
418 dest->ssa.out_type = out_type;
419
420 alu_node->num_src = 1;
421
422 for (int i = 0; i < instr->num_components; i++)
423 alu_node->src[0].swizzle[i] = i;
424
425 ppir_node_add_src(block->comp, &alu_node->node, alu_node->src, instr->src,
426 u_bit_consecutive(0, 4));
427
428 alu_node->node.is_out = 1;
429
430 list_addtail(&alu_node->node.list, &block->node_list);
431 return true;
432 }
433
434 case nir_intrinsic_terminate:
435 node = ppir_emit_discard(block, ni);
436 list_addtail(&node->list, &block->node_list);
437 return true;
438
439 case nir_intrinsic_terminate_if:
440 node = ppir_emit_discard_if(block, ni);
441 list_addtail(&node->list, &block->node_list);
442 return true;
443
444 case nir_intrinsic_ddx:
445 return ppir_emit_derivative(block, ni, ppir_op_ddx);
446 case nir_intrinsic_ddy:
447 return ppir_emit_derivative(block, ni, ppir_op_ddy);
448
449 default:
450 ppir_error("unsupported nir_intrinsic_instr %s\n",
451 nir_intrinsic_infos[instr->intrinsic].name);
452 return false;
453 }
454 }
455
ppir_emit_load_const(ppir_block * block,nir_instr * ni)456 static bool ppir_emit_load_const(ppir_block *block, nir_instr *ni)
457 {
458 nir_load_const_instr *instr = nir_instr_as_load_const(ni);
459 ppir_const_node *node = ppir_node_create_ssa(block, ppir_op_const, &instr->def);
460 if (!node)
461 return false;
462
463 assert(instr->def.bit_size == 32);
464
465 for (int i = 0; i < instr->def.num_components; i++)
466 node->constant.value[i].i = instr->value[i].i32;
467 node->constant.num = instr->def.num_components;
468
469 list_addtail(&node->node.list, &block->node_list);
470 return true;
471 }
472
ppir_emit_ssa_undef(ppir_block * block,nir_instr * ni)473 static bool ppir_emit_ssa_undef(ppir_block *block, nir_instr *ni)
474 {
475 nir_undef_instr *undef = nir_instr_as_undef(ni);
476 ppir_node *node = ppir_node_create_ssa(block, ppir_op_undef, &undef->def);
477 if (!node)
478 return false;
479 ppir_alu_node *alu = ppir_node_to_alu(node);
480
481 ppir_dest *dest = &alu->dest;
482 dest->ssa.undef = true;
483
484 list_addtail(&node->list, &block->node_list);
485 return true;
486 }
487
ppir_emit_tex(ppir_block * block,nir_instr * ni)488 static bool ppir_emit_tex(ppir_block *block, nir_instr *ni)
489 {
490 nir_tex_instr *instr = nir_instr_as_tex(ni);
491 ppir_load_texture_node *node;
492
493 switch (instr->op) {
494 case nir_texop_tex:
495 case nir_texop_txb:
496 case nir_texop_txl:
497 break;
498 default:
499 ppir_error("unsupported texop %d\n", instr->op);
500 return false;
501 }
502
503 switch (instr->sampler_dim) {
504 case GLSL_SAMPLER_DIM_1D:
505 case GLSL_SAMPLER_DIM_2D:
506 case GLSL_SAMPLER_DIM_3D:
507 case GLSL_SAMPLER_DIM_CUBE:
508 case GLSL_SAMPLER_DIM_RECT:
509 case GLSL_SAMPLER_DIM_EXTERNAL:
510 break;
511 default:
512 ppir_error("unsupported sampler dim: %d\n", instr->sampler_dim);
513 return false;
514 }
515
516 /* emit ld_tex node */
517
518 unsigned mask = 0;
519 mask = u_bit_consecutive(0, nir_tex_instr_dest_size(instr));
520
521 node = ppir_node_create_dest(block, ppir_op_load_texture, &instr->def, mask);
522 if (!node)
523 return false;
524
525 node->sampler = instr->texture_index;
526 node->sampler_dim = instr->sampler_dim;
527
528 for (int i = 0; i < instr->coord_components; i++)
529 node->src[0].swizzle[i] = i;
530
531 bool perspective = false;
532
533 for (int i = 0; i < instr->num_srcs; i++) {
534 switch (instr->src[i].src_type) {
535 case nir_tex_src_backend1:
536 perspective = true;
537 FALLTHROUGH;
538 case nir_tex_src_coord: {
539 nir_src *ns = &instr->src[i].src;
540 ppir_node *child = block->comp->var_nodes[ns->ssa->index];
541 if (child->op == ppir_op_load_varying) {
542 /* If the successor is load_texture, promote it to load_coords */
543 nir_tex_src *nts = (nir_tex_src *)ns;
544 if (nts->src_type == nir_tex_src_coord ||
545 nts->src_type == nir_tex_src_backend1)
546 child->op = ppir_op_load_coords;
547 }
548
549 /* src[0] is not used by the ld_tex instruction but ensures
550 * correct scheduling due to the pipeline dependency */
551 ppir_node_add_src(block->comp, &node->node, &node->src[0], &instr->src[i].src,
552 u_bit_consecutive(0, instr->coord_components));
553 node->num_src++;
554 break;
555 }
556 case nir_tex_src_bias:
557 case nir_tex_src_lod:
558 node->lod_bias_en = true;
559 node->explicit_lod = (instr->src[i].src_type == nir_tex_src_lod);
560 ppir_node_add_src(block->comp, &node->node, &node->src[1], &instr->src[i].src, 1);
561 node->num_src++;
562 break;
563 default:
564 ppir_error("unsupported texture source type\n");
565 return false;
566 }
567 }
568
569 list_addtail(&node->node.list, &block->node_list);
570
571 /* validate load coords node */
572
573 ppir_node *src_coords = ppir_node_get_src(&node->node, 0)->node;
574 ppir_load_node *load = NULL;
575
576 if (src_coords && ppir_node_has_single_src_succ(src_coords) &&
577 (src_coords->op == ppir_op_load_coords))
578 load = ppir_node_to_load(src_coords);
579 else {
580 /* Create load_coords node */
581 load = ppir_node_create(block, ppir_op_load_coords_reg, -1, 0);
582 if (!load)
583 return false;
584 list_addtail(&load->node.list, &block->node_list);
585
586 load->src = node->src[0];
587 load->num_src = 1;
588 load->num_components = instr->coord_components;
589
590 ppir_debug("%s create load_coords node %d for %d\n",
591 __func__, load->index, node->node.index);
592
593 ppir_node_foreach_pred_safe((&node->node), dep) {
594 ppir_node *pred = dep->pred;
595 ppir_node_remove_dep(dep);
596 ppir_node_add_dep(&load->node, pred, ppir_dep_src);
597 }
598 ppir_node_add_dep(&node->node, &load->node, ppir_dep_src);
599 }
600
601 assert(load);
602
603 if (perspective) {
604 if (instr->coord_components == 3)
605 load->perspective = ppir_perspective_z;
606 else
607 load->perspective = ppir_perspective_w;
608 }
609
610 load->sampler_dim = instr->sampler_dim;
611 node->src[0].type = load->dest.type = ppir_target_pipeline;
612 node->src[0].pipeline = load->dest.pipeline = ppir_pipeline_reg_discard;
613
614 return true;
615 }
616
ppir_get_block(ppir_compiler * comp,nir_block * nblock)617 static ppir_block *ppir_get_block(ppir_compiler *comp, nir_block *nblock)
618 {
619 ppir_block *block = _mesa_hash_table_u64_search(comp->blocks, (uintptr_t)nblock);
620
621 return block;
622 }
623
ppir_emit_jump(ppir_block * block,nir_instr * ni)624 static bool ppir_emit_jump(ppir_block *block, nir_instr *ni)
625 {
626 ppir_node *node;
627 ppir_compiler *comp = block->comp;
628 ppir_branch_node *branch;
629 ppir_block *jump_block;
630 nir_jump_instr *jump = nir_instr_as_jump(ni);
631
632 switch (jump->type) {
633 case nir_jump_break: {
634 assert(comp->current_block->successors[0]);
635 assert(!comp->current_block->successors[1]);
636 jump_block = comp->current_block->successors[0];
637 }
638 break;
639 case nir_jump_continue:
640 jump_block = comp->loop_cont_block;
641 break;
642 default:
643 ppir_error("nir_jump_instr not support\n");
644 return false;
645 }
646
647 assert(jump_block != NULL);
648
649 node = ppir_node_create(block, ppir_op_branch, -1, 0);
650 if (!node)
651 return false;
652 branch = ppir_node_to_branch(node);
653
654 /* Unconditional */
655 branch->num_src = 0;
656 branch->target = jump_block;
657
658 list_addtail(&node->list, &block->node_list);
659 return true;
660 }
661
662 static bool (*ppir_emit_instr[nir_instr_type_phi])(ppir_block *, nir_instr *) = {
663 [nir_instr_type_alu] = ppir_emit_alu,
664 [nir_instr_type_intrinsic] = ppir_emit_intrinsic,
665 [nir_instr_type_load_const] = ppir_emit_load_const,
666 [nir_instr_type_undef] = ppir_emit_ssa_undef,
667 [nir_instr_type_tex] = ppir_emit_tex,
668 [nir_instr_type_jump] = ppir_emit_jump,
669 };
670
ppir_block_create(ppir_compiler * comp)671 static ppir_block *ppir_block_create(ppir_compiler *comp)
672 {
673 ppir_block *block = rzalloc(comp, ppir_block);
674 if (!block)
675 return NULL;
676
677 list_inithead(&block->node_list);
678 list_inithead(&block->instr_list);
679
680 block->comp = comp;
681
682 return block;
683 }
684
ppir_emit_block(ppir_compiler * comp,nir_block * nblock)685 static bool ppir_emit_block(ppir_compiler *comp, nir_block *nblock)
686 {
687 ppir_block *block = ppir_get_block(comp, nblock);
688
689 comp->current_block = block;
690
691 list_addtail(&block->list, &comp->block_list);
692
693 nir_foreach_instr(instr, nblock) {
694 assert(instr->type < nir_instr_type_phi);
695 if (!ppir_emit_instr[instr->type](block, instr))
696 return false;
697 }
698
699 return true;
700 }
701
702 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list);
703
ppir_emit_if(ppir_compiler * comp,nir_if * if_stmt)704 static bool ppir_emit_if(ppir_compiler *comp, nir_if *if_stmt)
705 {
706 ppir_node *node;
707 ppir_branch_node *else_branch, *after_branch;
708 nir_block *nir_else_block = nir_if_first_else_block(if_stmt);
709 bool empty_else_block =
710 (nir_else_block == nir_if_last_else_block(if_stmt) &&
711 exec_list_is_empty(&nir_else_block->instr_list));
712 ppir_block *block = comp->current_block;
713
714 node = ppir_node_create(block, ppir_op_branch, -1, 0);
715 if (!node)
716 return false;
717 else_branch = ppir_node_to_branch(node);
718 ppir_node_add_src(block->comp, node, &else_branch->src[0],
719 &if_stmt->condition, 1);
720 else_branch->num_src = 1;
721 /* Negate condition to minimize branching. We're generating following:
722 * current_block: { ...; if (!statement) branch else_block; }
723 * then_block: { ...; branch after_block; }
724 * else_block: { ... }
725 * after_block: { ... }
726 *
727 * or if else list is empty:
728 * block: { if (!statement) branch else_block; }
729 * then_block: { ... }
730 * else_block: after_block: { ... }
731 */
732 else_branch->negate = true;
733 list_addtail(&else_branch->node.list, &block->node_list);
734
735 if (!ppir_emit_cf_list(comp, &if_stmt->then_list))
736 return false;
737
738 if (empty_else_block) {
739 nir_block *nblock = nir_if_last_else_block(if_stmt);
740 assert(nblock->successors[0]);
741 assert(!nblock->successors[1]);
742 else_branch->target = ppir_get_block(comp, nblock->successors[0]);
743 /* Add empty else block to the list */
744 list_addtail(&block->successors[1]->list, &comp->block_list);
745 return true;
746 }
747
748 else_branch->target = ppir_get_block(comp, nir_if_first_else_block(if_stmt));
749
750 nir_block *last_then_block = nir_if_last_then_block(if_stmt);
751 assert(last_then_block->successors[0]);
752 assert(!last_then_block->successors[1]);
753 block = ppir_get_block(comp, last_then_block);
754 node = ppir_node_create(block, ppir_op_branch, -1, 0);
755 if (!node)
756 return false;
757 after_branch = ppir_node_to_branch(node);
758 /* Unconditional */
759 after_branch->num_src = 0;
760 after_branch->target = ppir_get_block(comp, last_then_block->successors[0]);
761 /* Target should be after_block, will fixup later */
762 list_addtail(&after_branch->node.list, &block->node_list);
763
764 if (!ppir_emit_cf_list(comp, &if_stmt->else_list))
765 return false;
766
767 return true;
768 }
769
ppir_emit_loop(ppir_compiler * comp,nir_loop * nloop)770 static bool ppir_emit_loop(ppir_compiler *comp, nir_loop *nloop)
771 {
772 assert(!nir_loop_has_continue_construct(nloop));
773 ppir_block *save_loop_cont_block = comp->loop_cont_block;
774 ppir_block *block;
775 ppir_branch_node *loop_branch;
776 nir_block *loop_last_block;
777 ppir_node *node;
778
779 comp->loop_cont_block = ppir_get_block(comp, nir_loop_first_block(nloop));
780
781 if (!ppir_emit_cf_list(comp, &nloop->body))
782 return false;
783
784 loop_last_block = nir_loop_last_block(nloop);
785 block = ppir_get_block(comp, loop_last_block);
786 node = ppir_node_create(block, ppir_op_branch, -1, 0);
787 if (!node)
788 return false;
789 loop_branch = ppir_node_to_branch(node);
790 /* Unconditional */
791 loop_branch->num_src = 0;
792 loop_branch->target = comp->loop_cont_block;
793 list_addtail(&loop_branch->node.list, &block->node_list);
794
795 comp->loop_cont_block = save_loop_cont_block;
796
797 comp->num_loops++;
798
799 return true;
800 }
801
ppir_emit_function(ppir_compiler * comp,nir_function_impl * nfunc)802 static bool ppir_emit_function(ppir_compiler *comp, nir_function_impl *nfunc)
803 {
804 ppir_error("function nir_cf_node not support\n");
805 return false;
806 }
807
ppir_emit_cf_list(ppir_compiler * comp,struct exec_list * list)808 static bool ppir_emit_cf_list(ppir_compiler *comp, struct exec_list *list)
809 {
810 foreach_list_typed(nir_cf_node, node, node, list) {
811 bool ret;
812
813 switch (node->type) {
814 case nir_cf_node_block:
815 ret = ppir_emit_block(comp, nir_cf_node_as_block(node));
816 break;
817 case nir_cf_node_if:
818 ret = ppir_emit_if(comp, nir_cf_node_as_if(node));
819 break;
820 case nir_cf_node_loop:
821 ret = ppir_emit_loop(comp, nir_cf_node_as_loop(node));
822 break;
823 case nir_cf_node_function:
824 ret = ppir_emit_function(comp, nir_cf_node_as_function(node));
825 break;
826 default:
827 ppir_error("unknown NIR node type %d\n", node->type);
828 return false;
829 }
830
831 if (!ret)
832 return false;
833 }
834
835 return true;
836 }
837
ppir_compiler_create(void * prog,unsigned num_ssa)838 static ppir_compiler *ppir_compiler_create(void *prog, unsigned num_ssa)
839 {
840 ppir_compiler *comp = rzalloc_size(
841 prog, sizeof(*comp) + (num_ssa << 2) * sizeof(ppir_node *));
842 if (!comp)
843 return NULL;
844
845 list_inithead(&comp->block_list);
846 list_inithead(&comp->reg_list);
847 comp->reg_num = 0;
848 comp->blocks = _mesa_hash_table_u64_create(prog);
849
850 comp->var_nodes = (ppir_node **)(comp + 1);
851 comp->prog = prog;
852
853 return comp;
854 }
855
ppir_add_ordering_deps(ppir_compiler * comp)856 static void ppir_add_ordering_deps(ppir_compiler *comp)
857 {
858 /* Some intrinsics do not have explicit dependencies and thus depend
859 * on instructions order. Consider discard_if and the is_end node as
860 * example. If we don't add fake dependency of discard_if to is_end,
861 * scheduler may put the is_end first and since is_end terminates
862 * shader on Utgard PP, rest of it will never be executed.
863 * Add fake dependencies for discard/branch/store to preserve
864 * instruction order.
865 *
866 * TODO: scheduler should schedule discard_if as early as possible otherwise
867 * we may end up with suboptimal code for cases like this:
868 *
869 * s3 = s1 < s2
870 * discard_if s3
871 * s4 = s1 + s2
872 * store s4
873 *
874 * In this case store depends on discard_if and s4, but since dependencies can
875 * be scheduled in any order it can result in code like this:
876 *
877 * instr1: s3 = s1 < s3
878 * instr2: s4 = s1 + s2
879 * instr3: discard_if s3
880 * instr4: store s4
881 */
882 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
883 ppir_node *prev_node = NULL;
884 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
885 if (prev_node && ppir_node_is_root(node) && node->op != ppir_op_const) {
886 ppir_node_add_dep(prev_node, node, ppir_dep_sequence);
887 }
888 if (node->is_out ||
889 node->op == ppir_op_discard ||
890 node->op == ppir_op_store_temp ||
891 node->op == ppir_op_branch) {
892 prev_node = node;
893 }
894 }
895 }
896 }
897
ppir_print_shader_db(struct nir_shader * nir,ppir_compiler * comp,struct util_debug_callback * debug)898 static void ppir_print_shader_db(struct nir_shader *nir, ppir_compiler *comp,
899 struct util_debug_callback *debug)
900 {
901 const struct shader_info *info = &nir->info;
902 char *shaderdb;
903 ASSERTED int ret = asprintf(&shaderdb,
904 "%s shader: %d inst, %d loops, %d:%d spills:fills\n",
905 gl_shader_stage_name(info->stage),
906 comp->cur_instr_index,
907 comp->num_loops,
908 comp->num_spills,
909 comp->num_fills);
910 assert(ret >= 0);
911
912 if (lima_debug & LIMA_DEBUG_SHADERDB)
913 fprintf(stderr, "SHADER-DB: %s\n", shaderdb);
914
915 util_debug_message(debug, SHADER_INFO, "%s", shaderdb);
916 free(shaderdb);
917 }
918
ppir_add_write_after_read_deps(ppir_compiler * comp)919 static void ppir_add_write_after_read_deps(ppir_compiler *comp)
920 {
921 list_for_each_entry(ppir_block, block, &comp->block_list, list) {
922 list_for_each_entry(ppir_reg, reg, &comp->reg_list, list) {
923 ppir_node *write = NULL;
924 list_for_each_entry_rev(ppir_node, node, &block->node_list, list) {
925 for (int i = 0; i < ppir_node_get_src_num(node); i++) {
926 ppir_src *src = ppir_node_get_src(node, i);
927 if (src && src->type == ppir_target_register &&
928 src->reg == reg &&
929 write) {
930 ppir_debug("Adding dep %d for write %d\n", node->index, write->index);
931 ppir_node_add_dep(write, node, ppir_dep_write_after_read);
932 }
933 }
934 ppir_dest *dest = ppir_node_get_dest(node);
935 if (dest && dest->type == ppir_target_register &&
936 dest->reg == reg)
937 write = node;
938 }
939 }
940 }
941 }
942
ppir_compile_nir(struct lima_fs_compiled_shader * prog,struct nir_shader * nir,struct ra_regs * ra,struct util_debug_callback * debug)943 bool ppir_compile_nir(struct lima_fs_compiled_shader *prog, struct nir_shader *nir,
944 struct ra_regs *ra,
945 struct util_debug_callback *debug)
946 {
947 nir_function_impl *func = nir_shader_get_entrypoint(nir);
948 ppir_compiler *comp = ppir_compiler_create(prog, func->ssa_alloc);
949 if (!comp)
950 return false;
951
952 comp->ra = ra;
953 comp->uses_discard = nir->info.fs.uses_discard;
954 comp->dual_source_blend = nir->info.fs.color_is_dual_source;
955
956 /* 1st pass: create ppir blocks */
957 nir_foreach_function_impl(impl, nir) {
958 nir_foreach_block(nblock, impl) {
959 ppir_block *block = ppir_block_create(comp);
960 if (!block)
961 return false;
962 block->index = nblock->index;
963 _mesa_hash_table_u64_insert(comp->blocks, (uintptr_t)nblock, block);
964 }
965 }
966
967 /* 2nd pass: populate successors */
968 nir_foreach_function_impl(impl, nir) {
969 nir_foreach_block(nblock, impl) {
970 ppir_block *block = ppir_get_block(comp, nblock);
971 assert(block);
972
973 for (int i = 0; i < 2; i++) {
974 if (nblock->successors[i])
975 block->successors[i] = ppir_get_block(comp, nblock->successors[i]);
976 }
977 }
978 }
979
980 comp->out_type_to_reg = rzalloc_size(comp, sizeof(int) * ppir_output_num);
981
982 /* -1 means reg is not written by the shader */
983 for (int i = 0; i < ppir_output_num; i++)
984 comp->out_type_to_reg[i] = -1;
985
986 nir_foreach_reg_decl(decl, func) {
987 ppir_reg *r = rzalloc(comp, ppir_reg);
988 if (!r)
989 return false;
990
991 r->index = decl->def.index;
992 r->num_components = nir_intrinsic_num_components(decl);
993 r->is_head = false;
994 list_addtail(&r->list, &comp->reg_list);
995 comp->reg_num++;
996 }
997
998 if (!ppir_emit_cf_list(comp, &func->body))
999 goto err_out0;
1000
1001 /* If we have discard block add it to the very end */
1002 if (comp->discard_block)
1003 list_addtail(&comp->discard_block->list, &comp->block_list);
1004
1005 ppir_node_print_prog(comp);
1006
1007 if (!ppir_lower_prog(comp))
1008 goto err_out0;
1009
1010 ppir_add_ordering_deps(comp);
1011 ppir_add_write_after_read_deps(comp);
1012
1013 ppir_node_print_prog(comp);
1014
1015 if (!ppir_node_to_instr(comp))
1016 goto err_out0;
1017
1018 if (!ppir_schedule_prog(comp))
1019 goto err_out0;
1020
1021 if (!ppir_regalloc_prog(comp))
1022 goto err_out0;
1023
1024 if (!ppir_codegen_prog(comp))
1025 goto err_out0;
1026
1027 ppir_print_shader_db(nir, comp, debug);
1028
1029 _mesa_hash_table_u64_destroy(comp->blocks);
1030 ralloc_free(comp);
1031 return true;
1032
1033 err_out0:
1034 _mesa_hash_table_u64_destroy(comp->blocks);
1035 ralloc_free(comp);
1036 return false;
1037 }
1038
1039