xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_eu.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  Copyright (C) Intel Corp.  2006.  All Rights Reserved.
3  Intel funded Tungsten Graphics to
4  develop this 3D driver.
5 
6  Permission is hereby granted, free of charge, to any person obtaining
7  a copy of this software and associated documentation files (the
8  "Software"), to deal in the Software without restriction, including
9  without limitation the rights to use, copy, modify, merge, publish,
10  distribute, sublicense, and/or sell copies of the Software, and to
11  permit persons to whom the Software is furnished to do so, subject to
12  the following conditions:
13 
14  The above copyright notice and this permission notice (including the
15  next paragraph) shall be included in all copies or substantial
16  portions of the Software.
17 
18  THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19  EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20  MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21  IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22  LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23  OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24  WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 
26  **********************************************************************/
27  /*
28   * Authors:
29   *   Keith Whitwell <[email protected]>
30   */
31 
32 #include <sys/stat.h>
33 #include <fcntl.h>
34 
35 #include "elk_disasm.h"
36 #include "elk_eu_defines.h"
37 #include "elk_eu.h"
38 #include "elk_shader.h"
39 #include "../intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41 
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44 
45 /* Returns a conditional modifier that negates the condition. */
46 enum elk_conditional_mod
elk_negate_cmod(enum elk_conditional_mod cmod)47 elk_negate_cmod(enum elk_conditional_mod cmod)
48 {
49    switch (cmod) {
50    case ELK_CONDITIONAL_Z:
51       return ELK_CONDITIONAL_NZ;
52    case ELK_CONDITIONAL_NZ:
53       return ELK_CONDITIONAL_Z;
54    case ELK_CONDITIONAL_G:
55       return ELK_CONDITIONAL_LE;
56    case ELK_CONDITIONAL_GE:
57       return ELK_CONDITIONAL_L;
58    case ELK_CONDITIONAL_L:
59       return ELK_CONDITIONAL_GE;
60    case ELK_CONDITIONAL_LE:
61       return ELK_CONDITIONAL_G;
62    default:
63       unreachable("Can't negate this cmod");
64    }
65 }
66 
67 /* Returns the corresponding conditional mod for swapping src0 and
68  * src1 in e.g. CMP.
69  */
70 enum elk_conditional_mod
elk_swap_cmod(enum elk_conditional_mod cmod)71 elk_swap_cmod(enum elk_conditional_mod cmod)
72 {
73    switch (cmod) {
74    case ELK_CONDITIONAL_Z:
75    case ELK_CONDITIONAL_NZ:
76       return cmod;
77    case ELK_CONDITIONAL_G:
78       return ELK_CONDITIONAL_L;
79    case ELK_CONDITIONAL_GE:
80       return ELK_CONDITIONAL_LE;
81    case ELK_CONDITIONAL_L:
82       return ELK_CONDITIONAL_G;
83    case ELK_CONDITIONAL_LE:
84       return ELK_CONDITIONAL_GE;
85    default:
86       return ELK_CONDITIONAL_NONE;
87    }
88 }
89 
90 /**
91  * Get the least significant bit offset of the i+1-th component of immediate
92  * type \p type.  For \p i equal to the two's complement of j, return the
93  * offset of the j-th component starting from the end of the vector.  For
94  * scalar register types return zero.
95  */
96 static unsigned
imm_shift(enum elk_reg_type type,unsigned i)97 imm_shift(enum elk_reg_type type, unsigned i)
98 {
99    assert(type != ELK_REGISTER_TYPE_UV && type != ELK_REGISTER_TYPE_V &&
100           "Not implemented.");
101 
102    if (type == ELK_REGISTER_TYPE_VF)
103       return 8 * (i & 3);
104    else
105       return 0;
106 }
107 
108 /**
109  * Swizzle an arbitrary immediate \p x of the given type according to the
110  * permutation specified as \p swz.
111  */
112 uint32_t
elk_swizzle_immediate(enum elk_reg_type type,uint32_t x,unsigned swz)113 elk_swizzle_immediate(enum elk_reg_type type, uint32_t x, unsigned swz)
114 {
115    if (imm_shift(type, 1)) {
116       const unsigned n = 32 / imm_shift(type, 1);
117       uint32_t y = 0;
118 
119       for (unsigned i = 0; i < n; i++) {
120          /* Shift the specified component all the way to the right and left to
121           * discard any undesired L/MSBs, then shift it right into component i.
122           */
123          y |= x >> imm_shift(type, (i & ~3) + ELK_GET_SWZ(swz, i & 3))
124                 << imm_shift(type, ~0u)
125                 >> imm_shift(type, ~0u - i);
126       }
127 
128       return y;
129    } else {
130       return x;
131    }
132 }
133 
134 unsigned
elk_get_default_exec_size(struct elk_codegen * p)135 elk_get_default_exec_size(struct elk_codegen *p)
136 {
137    return p->current->exec_size;
138 }
139 
140 unsigned
elk_get_default_group(struct elk_codegen * p)141 elk_get_default_group(struct elk_codegen *p)
142 {
143    return p->current->group;
144 }
145 
146 unsigned
elk_get_default_access_mode(struct elk_codegen * p)147 elk_get_default_access_mode(struct elk_codegen *p)
148 {
149    return p->current->access_mode;
150 }
151 
152 void
elk_set_default_exec_size(struct elk_codegen * p,unsigned value)153 elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
154 {
155    p->current->exec_size = value;
156 }
157 
elk_set_default_predicate_control(struct elk_codegen * p,enum elk_predicate pc)158 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc)
159 {
160    p->current->predicate = pc;
161 }
162 
elk_set_default_predicate_inverse(struct elk_codegen * p,bool predicate_inverse)163 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse)
164 {
165    p->current->pred_inv = predicate_inverse;
166 }
167 
elk_set_default_flag_reg(struct elk_codegen * p,int reg,int subreg)168 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg)
169 {
170    assert(subreg < 2);
171    p->current->flag_subreg = reg * 2 + subreg;
172 }
173 
elk_set_default_access_mode(struct elk_codegen * p,unsigned access_mode)174 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode )
175 {
176    p->current->access_mode = access_mode;
177 }
178 
179 void
elk_set_default_compression_control(struct elk_codegen * p,enum elk_compression compression_control)180 elk_set_default_compression_control(struct elk_codegen *p,
181 			    enum elk_compression compression_control)
182 {
183    switch (compression_control) {
184    case ELK_COMPRESSION_NONE:
185       /* This is the "use the first set of bits of dmask/vmask/arf
186        * according to execsize" option.
187        */
188       p->current->group = 0;
189       break;
190    case ELK_COMPRESSION_2NDHALF:
191       /* For SIMD8, this is "use the second set of 8 bits." */
192       p->current->group = 8;
193       break;
194    case ELK_COMPRESSION_COMPRESSED:
195       /* For SIMD16 instruction compression, use the first set of 16 bits
196        * since we don't do SIMD32 dispatch.
197        */
198       p->current->group = 0;
199       break;
200    default:
201       unreachable("not reached");
202    }
203 
204    if (p->devinfo->ver <= 6) {
205       p->current->compressed =
206          (compression_control == ELK_COMPRESSION_COMPRESSED);
207    }
208 }
209 
210 /**
211  * Enable or disable instruction compression on the given instruction leaving
212  * the currently selected channel enable group untouched.
213  */
214 void
elk_inst_set_compression(const struct intel_device_info * devinfo,elk_inst * inst,bool on)215 elk_inst_set_compression(const struct intel_device_info *devinfo,
216                          elk_inst *inst, bool on)
217 {
218    if (devinfo->ver >= 6) {
219       /* No-op, the EU will figure out for us whether the instruction needs to
220        * be compressed.
221        */
222    } else {
223       /* The channel group and compression controls are non-orthogonal, there
224        * are two possible representations for uncompressed instructions and we
225        * may need to preserve the current one to avoid changing the selected
226        * channel group inadvertently.
227        */
228       if (on)
229          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_COMPRESSED);
230       else if (elk_inst_qtr_control(devinfo, inst)
231                == ELK_COMPRESSION_COMPRESSED)
232          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
233    }
234 }
235 
236 void
elk_set_default_compression(struct elk_codegen * p,bool on)237 elk_set_default_compression(struct elk_codegen *p, bool on)
238 {
239    p->current->compressed = on;
240 }
241 
242 /**
243  * Apply the range of channel enable signals given by
244  * [group, group + exec_size) to the instruction passed as argument.
245  */
246 void
elk_inst_set_group(const struct intel_device_info * devinfo,elk_inst * inst,unsigned group)247 elk_inst_set_group(const struct intel_device_info *devinfo,
248                    elk_inst *inst, unsigned group)
249 {
250    if (devinfo->ver >= 7) {
251       assert(group % 4 == 0 && group < 32);
252       elk_inst_set_qtr_control(devinfo, inst, group / 8);
253       elk_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
254 
255    } else if (devinfo->ver == 6) {
256       assert(group % 8 == 0 && group < 32);
257       elk_inst_set_qtr_control(devinfo, inst, group / 8);
258 
259    } else {
260       assert(group % 8 == 0 && group < 16);
261       /* The channel group and compression controls are non-orthogonal, there
262        * are two possible representations for group zero and we may need to
263        * preserve the current one to avoid changing the selected compression
264        * enable inadvertently.
265        */
266       if (group == 8)
267          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_2NDHALF);
268       else if (elk_inst_qtr_control(devinfo, inst) == ELK_COMPRESSION_2NDHALF)
269          elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
270    }
271 }
272 
273 void
elk_set_default_group(struct elk_codegen * p,unsigned group)274 elk_set_default_group(struct elk_codegen *p, unsigned group)
275 {
276    p->current->group = group;
277 }
278 
elk_set_default_mask_control(struct elk_codegen * p,unsigned value)279 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value )
280 {
281    p->current->mask_control = value;
282 }
283 
elk_set_default_saturate(struct elk_codegen * p,bool enable)284 void elk_set_default_saturate( struct elk_codegen *p, bool enable )
285 {
286    p->current->saturate = enable;
287 }
288 
elk_set_default_acc_write_control(struct elk_codegen * p,unsigned value)289 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
290 {
291    p->current->acc_wr_control = value;
292 }
293 
elk_push_insn_state(struct elk_codegen * p)294 void elk_push_insn_state( struct elk_codegen *p )
295 {
296    assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
297    *(p->current + 1) = *p->current;
298    p->current++;
299 }
300 
elk_pop_insn_state(struct elk_codegen * p)301 void elk_pop_insn_state( struct elk_codegen *p )
302 {
303    assert(p->current != p->stack);
304    p->current--;
305 }
306 
307 
308 /***********************************************************************
309  */
310 void
elk_init_codegen(const struct elk_isa_info * isa,struct elk_codegen * p,void * mem_ctx)311 elk_init_codegen(const struct elk_isa_info *isa,
312                  struct elk_codegen *p, void *mem_ctx)
313 {
314    memset(p, 0, sizeof(*p));
315 
316    p->isa = isa;
317    p->devinfo = isa->devinfo;
318    p->automatic_exec_sizes = true;
319    /*
320     * Set the initial instruction store array size to 1024, if found that
321     * isn't enough, then it will double the store size at elk_next_insn()
322     * until out of memory.
323     */
324    p->store_size = 1024;
325    p->store = rzalloc_array(mem_ctx, elk_inst, p->store_size);
326    p->nr_insn = 0;
327    p->current = p->stack;
328    memset(p->current, 0, sizeof(p->current[0]));
329 
330    p->mem_ctx = mem_ctx;
331 
332    /* Some defaults?
333     */
334    elk_set_default_exec_size(p, ELK_EXECUTE_8);
335    elk_set_default_mask_control(p, ELK_MASK_ENABLE); /* what does this do? */
336    elk_set_default_saturate(p, 0);
337    elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
338 
339    /* Set up control flow stack */
340    p->if_stack_depth = 0;
341    p->if_stack_array_size = 16;
342    p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
343 
344    p->loop_stack_depth = 0;
345    p->loop_stack_array_size = 16;
346    p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
347    p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
348 }
349 
350 
elk_get_program(struct elk_codegen * p,unsigned * sz)351 const unsigned *elk_get_program( struct elk_codegen *p,
352 			       unsigned *sz )
353 {
354    *sz = p->next_insn_offset;
355    return (const unsigned *)p->store;
356 }
357 
358 const struct elk_shader_reloc *
elk_get_shader_relocs(struct elk_codegen * p,unsigned * num_relocs)359 elk_get_shader_relocs(struct elk_codegen *p, unsigned *num_relocs)
360 {
361    *num_relocs = p->num_relocs;
362    return p->relocs;
363 }
364 
365 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
366 
elk_should_dump_shader_bin(void)367 bool elk_should_dump_shader_bin(void)
368 {
369    return debug_get_option_shader_bin_dump_path() != NULL;
370 }
371 
elk_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)372 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
373                          const char *identifier)
374 {
375    char *name = ralloc_asprintf(NULL, "%s/%s.bin",
376                                 debug_get_option_shader_bin_dump_path(),
377                                 identifier);
378 
379    int fd = open(name, O_CREAT | O_WRONLY | O_TRUNC, 0644);
380    ralloc_free(name);
381 
382    if (fd < 0)
383       return;
384 
385    struct stat sb;
386    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
387       close(fd);
388       return;
389    }
390 
391    size_t to_write = end_offset - start_offset;
392    void *write_ptr = assembly + start_offset;
393 
394    while (to_write) {
395       ssize_t ret = write(fd, write_ptr, to_write);
396 
397       if (ret <= 0) {
398          close(fd);
399          return;
400       }
401 
402       to_write -= ret;
403       write_ptr += ret;
404    }
405 
406    close(fd);
407 }
408 
elk_try_override_assembly(struct elk_codegen * p,int start_offset,const char * identifier)409 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
410                                const char *identifier)
411 {
412    const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
413    if (!read_path) {
414       return false;
415    }
416 
417    char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
418 
419    int fd = open(name, O_RDONLY);
420    ralloc_free(name);
421 
422    if (fd == -1) {
423       return false;
424    }
425 
426    struct stat sb;
427    if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
428       close(fd);
429       return false;
430    }
431 
432    p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(elk_inst);
433    p->nr_insn += sb.st_size / sizeof(elk_inst);
434 
435    p->next_insn_offset = start_offset + sb.st_size;
436    p->store_size = (start_offset + sb.st_size) / sizeof(elk_inst);
437    p->store = (elk_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
438    assert(p->store);
439 
440    ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
441    close(fd);
442    if (ret != sb.st_size) {
443       return false;
444    }
445 
446    ASSERTED bool valid =
447       elk_validate_instructions(p->isa, p->store,
448                                 start_offset, p->next_insn_offset,
449                                 NULL);
450    assert(valid);
451 
452    return true;
453 }
454 
455 const struct elk_label *
elk_find_label(const struct elk_label * root,int offset)456 elk_find_label(const struct elk_label *root, int offset)
457 {
458    const struct elk_label *curr = root;
459 
460    if (curr != NULL)
461    {
462       do {
463          if (curr->offset == offset)
464             return curr;
465 
466          curr = curr->next;
467       } while (curr != NULL);
468    }
469 
470    return curr;
471 }
472 
473 void
elk_create_label(struct elk_label ** labels,int offset,void * mem_ctx)474 elk_create_label(struct elk_label **labels, int offset, void *mem_ctx)
475 {
476    if (*labels != NULL) {
477       struct elk_label *curr = *labels;
478       struct elk_label *prev;
479 
480       do {
481          prev = curr;
482 
483          if (curr->offset == offset)
484             return;
485 
486          curr = curr->next;
487       } while (curr != NULL);
488 
489       curr = ralloc(mem_ctx, struct elk_label);
490       curr->offset = offset;
491       curr->number = prev->number + 1;
492       curr->next = NULL;
493       prev->next = curr;
494    } else {
495       struct elk_label *root = ralloc(mem_ctx, struct elk_label);
496       root->number = 0;
497       root->offset = offset;
498       root->next = NULL;
499       *labels = root;
500    }
501 }
502 
503 const struct elk_label *
elk_label_assembly(const struct elk_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)504 elk_label_assembly(const struct elk_isa_info *isa,
505                    const void *assembly, int start, int end, void *mem_ctx)
506 {
507    const struct intel_device_info *const devinfo = isa->devinfo;
508 
509    struct elk_label *root_label = NULL;
510 
511    int to_bytes_scale = sizeof(elk_inst) / elk_jump_scale(devinfo);
512 
513    for (int offset = start; offset < end;) {
514       const elk_inst *inst = (const elk_inst *) ((const char *) assembly + offset);
515       elk_inst uncompacted;
516 
517       bool is_compact = elk_inst_cmpt_control(devinfo, inst);
518 
519       if (is_compact) {
520          elk_compact_inst *compacted = (elk_compact_inst *)inst;
521          elk_uncompact_instruction(isa, &uncompacted, compacted);
522          inst = &uncompacted;
523       }
524 
525       if (elk_has_uip(devinfo, elk_inst_opcode(isa, inst))) {
526          /* Instructions that have UIP also have JIP. */
527          elk_create_label(&root_label,
528             offset + elk_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
529          elk_create_label(&root_label,
530             offset + elk_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
531       } else if (elk_has_jip(devinfo, elk_inst_opcode(isa, inst))) {
532          int jip;
533          if (devinfo->ver >= 7) {
534             jip = elk_inst_jip(devinfo, inst);
535          } else {
536             jip = elk_inst_gfx6_jump_count(devinfo, inst);
537          }
538 
539          elk_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
540       }
541 
542       if (is_compact) {
543          offset += sizeof(elk_compact_inst);
544       } else {
545          offset += sizeof(elk_inst);
546       }
547    }
548 
549    return root_label;
550 }
551 
552 void
elk_disassemble_with_labels(const struct elk_isa_info * isa,const void * assembly,int start,int end,FILE * out)553 elk_disassemble_with_labels(const struct elk_isa_info *isa,
554                             const void *assembly, int start, int end, FILE *out)
555 {
556    void *mem_ctx = ralloc_context(NULL);
557    const struct elk_label *root_label =
558       elk_label_assembly(isa, assembly, start, end, mem_ctx);
559 
560    elk_disassemble(isa, assembly, start, end, root_label, out);
561 
562    ralloc_free(mem_ctx);
563 }
564 
565 void
elk_disassemble(const struct elk_isa_info * isa,const void * assembly,int start,int end,const struct elk_label * root_label,FILE * out)566 elk_disassemble(const struct elk_isa_info *isa,
567                 const void *assembly, int start, int end,
568                 const struct elk_label *root_label, FILE *out)
569 {
570    const struct intel_device_info *devinfo = isa->devinfo;
571 
572    bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
573 
574    for (int offset = start; offset < end;) {
575       const elk_inst *insn = (const elk_inst *)((char *)assembly + offset);
576       elk_inst uncompacted;
577 
578       if (root_label != NULL) {
579         const struct elk_label *label = elk_find_label(root_label, offset);
580         if (label != NULL) {
581            fprintf(out, "\nLABEL%d:\n", label->number);
582         }
583       }
584 
585       bool compacted = elk_inst_cmpt_control(devinfo, insn);
586       if (0)
587          fprintf(out, "0x%08x: ", offset);
588 
589       if (compacted) {
590          elk_compact_inst *compacted = (elk_compact_inst *)insn;
591          if (dump_hex) {
592             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
593             const unsigned int blank_spaces = 24;
594             for (int i = 0 ; i < 8; i = i + 4) {
595                fprintf(out, "%02x %02x %02x %02x ",
596                        insn_ptr[i],
597                        insn_ptr[i + 1],
598                        insn_ptr[i + 2],
599                        insn_ptr[i + 3]);
600             }
601             /* Make compacted instructions hex value output vertically aligned
602              * with uncompacted instructions hex value
603              */
604             fprintf(out, "%*c", blank_spaces, ' ');
605          }
606 
607          elk_uncompact_instruction(isa, &uncompacted, compacted);
608          insn = &uncompacted;
609       } else {
610          if (dump_hex) {
611             unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
612             for (int i = 0 ; i < 16; i = i + 4) {
613                fprintf(out, "%02x %02x %02x %02x ",
614                        insn_ptr[i],
615                        insn_ptr[i + 1],
616                        insn_ptr[i + 2],
617                        insn_ptr[i + 3]);
618             }
619          }
620       }
621 
622       elk_disassemble_inst(out, isa, insn, compacted, offset, root_label);
623 
624       if (compacted) {
625          offset += sizeof(elk_compact_inst);
626       } else {
627          offset += sizeof(elk_inst);
628       }
629    }
630 }
631 
632 static const struct elk_opcode_desc opcode_descs[] = {
633    /* IR,                 HW,  name,      nsrc, ndst, gfx_vers */
634    { ELK_OPCODE_ILLEGAL,  0,   "illegal", 0,    0,    GFX_ALL },
635    { ELK_OPCODE_MOV,      1,   "mov",     1,    1,    GFX_LT(GFX12) },
636    { ELK_OPCODE_MOV,      97,  "mov",     1,    1,    GFX_GE(GFX12) },
637    { ELK_OPCODE_SEL,      2,   "sel",     2,    1,    GFX_LT(GFX12) },
638    { ELK_OPCODE_SEL,      98,  "sel",     2,    1,    GFX_GE(GFX12) },
639    { ELK_OPCODE_MOVI,     3,   "movi",    2,    1,    GFX_GE(GFX45) & GFX_LT(GFX12) },
640    { ELK_OPCODE_MOVI,     99,  "movi",    2,    1,    GFX_GE(GFX12) },
641    { ELK_OPCODE_NOT,      4,   "not",     1,    1,    GFX_LT(GFX12) },
642    { ELK_OPCODE_NOT,      100, "not",     1,    1,    GFX_GE(GFX12) },
643    { ELK_OPCODE_AND,      5,   "and",     2,    1,    GFX_LT(GFX12) },
644    { ELK_OPCODE_AND,      101, "and",     2,    1,    GFX_GE(GFX12) },
645    { ELK_OPCODE_OR,       6,   "or",      2,    1,    GFX_LT(GFX12) },
646    { ELK_OPCODE_OR,       102, "or",      2,    1,    GFX_GE(GFX12) },
647    { ELK_OPCODE_XOR,      7,   "xor",     2,    1,    GFX_LT(GFX12) },
648    { ELK_OPCODE_XOR,      103, "xor",     2,    1,    GFX_GE(GFX12) },
649    { ELK_OPCODE_SHR,      8,   "shr",     2,    1,    GFX_LT(GFX12) },
650    { ELK_OPCODE_SHR,      104, "shr",     2,    1,    GFX_GE(GFX12) },
651    { ELK_OPCODE_SHL,      9,   "shl",     2,    1,    GFX_LT(GFX12) },
652    { ELK_OPCODE_SHL,      105, "shl",     2,    1,    GFX_GE(GFX12) },
653    { ELK_OPCODE_DIM,      10,  "dim",     1,    1,    GFX75 },
654    { ELK_OPCODE_SMOV,     10,  "smov",    0,    0,    GFX_GE(GFX8) & GFX_LT(GFX12) },
655    { ELK_OPCODE_SMOV,     106, "smov",    0,    0,    GFX_GE(GFX12) },
656    { ELK_OPCODE_ASR,      12,  "asr",     2,    1,    GFX_LT(GFX12) },
657    { ELK_OPCODE_ASR,      108, "asr",     2,    1,    GFX_GE(GFX12) },
658    { ELK_OPCODE_CMP,      16,  "cmp",     2,    1,    GFX_LT(GFX12) },
659    { ELK_OPCODE_CMP,      112, "cmp",     2,    1,    GFX_GE(GFX12) },
660    { ELK_OPCODE_CMPN,     17,  "cmpn",    2,    1,    GFX_LT(GFX12) },
661    { ELK_OPCODE_CMPN,     113, "cmpn",    2,    1,    GFX_GE(GFX12) },
662    { ELK_OPCODE_CSEL,     18,  "csel",    3,    1,    GFX_GE(GFX8) & GFX_LT(GFX12) },
663    { ELK_OPCODE_CSEL,     114, "csel",    3,    1,    GFX_GE(GFX12) },
664    { ELK_OPCODE_F32TO16,  19,  "f32to16", 1,    1,    GFX7 | GFX75 },
665    { ELK_OPCODE_F16TO32,  20,  "f16to32", 1,    1,    GFX7 | GFX75 },
666    { ELK_OPCODE_BFREV,    23,  "bfrev",   1,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
667    { ELK_OPCODE_BFREV,    119, "bfrev",   1,    1,    GFX_GE(GFX12) },
668    { ELK_OPCODE_BFE,      24,  "bfe",     3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
669    { ELK_OPCODE_BFE,      120, "bfe",     3,    1,    GFX_GE(GFX12) },
670    { ELK_OPCODE_BFI1,     25,  "bfi1",    2,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
671    { ELK_OPCODE_BFI1,     121, "bfi1",    2,    1,    GFX_GE(GFX12) },
672    { ELK_OPCODE_BFI2,     26,  "bfi2",    3,    1,    GFX_GE(GFX7) & GFX_LT(GFX12) },
673    { ELK_OPCODE_BFI2,     122, "bfi2",    3,    1,    GFX_GE(GFX12) },
674    { ELK_OPCODE_JMPI,     32,  "jmpi",    0,    0,    GFX_ALL },
675    { ELK_OPCODE_BRD,      33,  "brd",     0,    0,    GFX_GE(GFX7) },
676    { ELK_OPCODE_IF,       34,  "if",      0,    0,    GFX_ALL },
677    { ELK_OPCODE_IFF,      35,  "iff",     0,    0,    GFX_LE(GFX5) },
678    { ELK_OPCODE_BRC,      35,  "brc",     0,    0,    GFX_GE(GFX7) },
679    { ELK_OPCODE_ELSE,     36,  "else",    0,    0,    GFX_ALL },
680    { ELK_OPCODE_ENDIF,    37,  "endif",   0,    0,    GFX_ALL },
681    { ELK_OPCODE_DO,       38,  "do",      0,    0,    GFX_LE(GFX5) },
682    { ELK_OPCODE_CASE,     38,  "case",    0,    0,    GFX6 },
683    { ELK_OPCODE_WHILE,    39,  "while",   0,    0,    GFX_ALL },
684    { ELK_OPCODE_BREAK,    40,  "break",   0,    0,    GFX_ALL },
685    { ELK_OPCODE_CONTINUE, 41,  "cont",    0,    0,    GFX_ALL },
686    { ELK_OPCODE_HALT,     42,  "halt",    0,    0,    GFX_ALL },
687    { ELK_OPCODE_CALLA,    43,  "calla",   0,    0,    GFX_GE(GFX75) },
688    { ELK_OPCODE_MSAVE,    44,  "msave",   0,    0,    GFX_LE(GFX5) },
689    { ELK_OPCODE_CALL,     44,  "call",    0,    0,    GFX_GE(GFX6) },
690    { ELK_OPCODE_MREST,    45,  "mrest",   0,    0,    GFX_LE(GFX5) },
691    { ELK_OPCODE_RET,      45,  "ret",     0,    0,    GFX_GE(GFX6) },
692    { ELK_OPCODE_PUSH,     46,  "push",    0,    0,    GFX_LE(GFX5) },
693    { ELK_OPCODE_FORK,     46,  "fork",    0,    0,    GFX6 },
694    { ELK_OPCODE_GOTO,     46,  "goto",    0,    0,    GFX_GE(GFX8) },
695    { ELK_OPCODE_POP,      47,  "pop",     2,    0,    GFX_LE(GFX5) },
696    { ELK_OPCODE_WAIT,     48,  "wait",    0,    1,    GFX_LT(GFX12) },
697    { ELK_OPCODE_SEND,     49,  "send",    1,    1,    GFX_LT(GFX12) },
698    { ELK_OPCODE_SENDC,    50,  "sendc",   1,    1,    GFX_LT(GFX12) },
699    { ELK_OPCODE_SEND,     49,  "send",    2,    1,    GFX_GE(GFX12) },
700    { ELK_OPCODE_SENDC,    50,  "sendc",   2,    1,    GFX_GE(GFX12) },
701    { ELK_OPCODE_MATH,     56,  "math",    2,    1,    GFX_GE(GFX6) },
702    { ELK_OPCODE_ADD,      64,  "add",     2,    1,    GFX_ALL },
703    { ELK_OPCODE_MUL,      65,  "mul",     2,    1,    GFX_ALL },
704    { ELK_OPCODE_AVG,      66,  "avg",     2,    1,    GFX_ALL },
705    { ELK_OPCODE_FRC,      67,  "frc",     1,    1,    GFX_ALL },
706    { ELK_OPCODE_RNDU,     68,  "rndu",    1,    1,    GFX_ALL },
707    { ELK_OPCODE_RNDD,     69,  "rndd",    1,    1,    GFX_ALL },
708    { ELK_OPCODE_RNDE,     70,  "rnde",    1,    1,    GFX_ALL },
709    { ELK_OPCODE_RNDZ,     71,  "rndz",    1,    1,    GFX_ALL },
710    { ELK_OPCODE_MAC,      72,  "mac",     2,    1,    GFX_ALL },
711    { ELK_OPCODE_MACH,     73,  "mach",    2,    1,    GFX_ALL },
712    { ELK_OPCODE_LZD,      74,  "lzd",     1,    1,    GFX_ALL },
713    { ELK_OPCODE_FBH,      75,  "fbh",     1,    1,    GFX_GE(GFX7) },
714    { ELK_OPCODE_FBL,      76,  "fbl",     1,    1,    GFX_GE(GFX7) },
715    { ELK_OPCODE_CBIT,     77,  "cbit",    1,    1,    GFX_GE(GFX7) },
716    { ELK_OPCODE_ADDC,     78,  "addc",    2,    1,    GFX_GE(GFX7) },
717    { ELK_OPCODE_SUBB,     79,  "subb",    2,    1,    GFX_GE(GFX7) },
718    { ELK_OPCODE_SAD2,     80,  "sad2",    2,    1,    GFX_ALL },
719    { ELK_OPCODE_SADA2,    81,  "sada2",   2,    1,    GFX_ALL },
720    { ELK_OPCODE_DP4,      84,  "dp4",     2,    1,    GFX_LT(GFX11) },
721    { ELK_OPCODE_DPH,      85,  "dph",     2,    1,    GFX_LT(GFX11) },
722    { ELK_OPCODE_DP3,      86,  "dp3",     2,    1,    GFX_LT(GFX11) },
723    { ELK_OPCODE_DP2,      87,  "dp2",     2,    1,    GFX_LT(GFX11) },
724    { ELK_OPCODE_LINE,     89,  "line",    2,    1,    GFX_LE(GFX10) },
725    { ELK_OPCODE_PLN,      90,  "pln",     2,    1,    GFX_GE(GFX45) & GFX_LE(GFX10) },
726    { ELK_OPCODE_MAD,      91,  "mad",     3,    1,    GFX_GE(GFX6) },
727    { ELK_OPCODE_LRP,      92,  "lrp",     3,    1,    GFX_GE(GFX6) & GFX_LE(GFX10) },
728    { ELK_OPCODE_MADM,     93,  "madm",    3,    1,    GFX_GE(GFX8) },
729    { ELK_OPCODE_NENOP,    125, "nenop",   0,    0,    GFX45 },
730    { ELK_OPCODE_NOP,      126, "nop",     0,    0,    GFX_LT(GFX12) },
731    { ELK_OPCODE_NOP,      96,  "nop",     0,    0,    GFX_GE(GFX12) }
732 };
733 
734 void
elk_init_isa_info(struct elk_isa_info * isa,const struct intel_device_info * devinfo)735 elk_init_isa_info(struct elk_isa_info *isa,
736                   const struct intel_device_info *devinfo)
737 {
738    isa->devinfo = devinfo;
739 
740    enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
741 
742    memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
743    memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
744 
745    for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
746       if (opcode_descs[i].gfx_vers & ver) {
747          const unsigned e = opcode_descs[i].ir;
748          const unsigned h = opcode_descs[i].hw;
749          assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
750          assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
751          isa->ir_to_descs[e] = &opcode_descs[i];
752          isa->hw_to_descs[h] = &opcode_descs[i];
753       }
754    }
755 }
756 
757 /**
758  * Return the matching opcode_desc for the specified IR opcode and hardware
759  * generation, or NULL if the opcode is not supported by the device.
760  */
761 const struct elk_opcode_desc *
elk_opcode_desc(const struct elk_isa_info * isa,enum elk_opcode op)762 elk_opcode_desc(const struct elk_isa_info *isa, enum elk_opcode op)
763 {
764    return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
765 }
766 
767 /**
768  * Return the matching opcode_desc for the specified HW opcode and hardware
769  * generation, or NULL if the opcode is not supported by the device.
770  */
771 const struct elk_opcode_desc *
elk_opcode_desc_from_hw(const struct elk_isa_info * isa,unsigned hw)772 elk_opcode_desc_from_hw(const struct elk_isa_info *isa, unsigned hw)
773 {
774    return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
775 }
776 
777 unsigned
elk_num_sources_from_inst(const struct elk_isa_info * isa,const elk_inst * inst)778 elk_num_sources_from_inst(const struct elk_isa_info *isa,
779                           const elk_inst *inst)
780 {
781    const struct intel_device_info *devinfo = isa->devinfo;
782    const struct elk_opcode_desc *desc =
783       elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
784    unsigned math_function;
785 
786    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
787       math_function = elk_inst_math_function(devinfo, inst);
788    } else if (devinfo->ver < 6 &&
789               elk_inst_opcode(isa, inst) == ELK_OPCODE_SEND) {
790       if (elk_inst_sfid(devinfo, inst) == ELK_SFID_MATH) {
791          /* src1 must be a descriptor (including the information to determine
792           * that the SEND is doing an extended math operation), but src0 can
793           * actually be null since it serves as the source of the implicit GRF
794           * to MRF move.
795           *
796           * If we stop using that functionality, we'll have to revisit this.
797           */
798          return 2;
799       } else {
800          /* Send instructions are allowed to have null sources since they use
801           * the base_mrf field to specify which message register source.
802           */
803          return 0;
804       }
805    } else {
806       assert(desc->nsrc < 4);
807       return desc->nsrc;
808    }
809 
810    switch (math_function) {
811    case ELK_MATH_FUNCTION_INV:
812    case ELK_MATH_FUNCTION_LOG:
813    case ELK_MATH_FUNCTION_EXP:
814    case ELK_MATH_FUNCTION_SQRT:
815    case ELK_MATH_FUNCTION_RSQ:
816    case ELK_MATH_FUNCTION_SIN:
817    case ELK_MATH_FUNCTION_COS:
818    case ELK_MATH_FUNCTION_SINCOS:
819    case GFX8_MATH_FUNCTION_INVM:
820    case GFX8_MATH_FUNCTION_RSQRTM:
821       return 1;
822    case ELK_MATH_FUNCTION_FDIV:
823    case ELK_MATH_FUNCTION_POW:
824    case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
825    case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
826    case ELK_MATH_FUNCTION_INT_DIV_REMAINDER:
827       return 2;
828    default:
829       unreachable("not reached");
830    }
831 }
832