1 /*
2 Copyright (C) Intel Corp. 2006. All Rights Reserved.
3 Intel funded Tungsten Graphics to
4 develop this 3D driver.
5
6 Permission is hereby granted, free of charge, to any person obtaining
7 a copy of this software and associated documentation files (the
8 "Software"), to deal in the Software without restriction, including
9 without limitation the rights to use, copy, modify, merge, publish,
10 distribute, sublicense, and/or sell copies of the Software, and to
11 permit persons to whom the Software is furnished to do so, subject to
12 the following conditions:
13
14 The above copyright notice and this permission notice (including the
15 next paragraph) shall be included in all copies or substantial
16 portions of the Software.
17
18 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
19 EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
21 IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
22 LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
23 OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
24 WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
26 **********************************************************************/
27 /*
28 * Authors:
29 * Keith Whitwell <[email protected]>
30 */
31
32 #include <sys/stat.h>
33 #include <fcntl.h>
34
35 #include "elk_disasm.h"
36 #include "elk_eu_defines.h"
37 #include "elk_eu.h"
38 #include "elk_shader.h"
39 #include "../intel_gfx_ver_enum.h"
40 #include "dev/intel_debug.h"
41
42 #include "util/u_debug.h"
43 #include "util/ralloc.h"
44
45 /* Returns a conditional modifier that negates the condition. */
46 enum elk_conditional_mod
elk_negate_cmod(enum elk_conditional_mod cmod)47 elk_negate_cmod(enum elk_conditional_mod cmod)
48 {
49 switch (cmod) {
50 case ELK_CONDITIONAL_Z:
51 return ELK_CONDITIONAL_NZ;
52 case ELK_CONDITIONAL_NZ:
53 return ELK_CONDITIONAL_Z;
54 case ELK_CONDITIONAL_G:
55 return ELK_CONDITIONAL_LE;
56 case ELK_CONDITIONAL_GE:
57 return ELK_CONDITIONAL_L;
58 case ELK_CONDITIONAL_L:
59 return ELK_CONDITIONAL_GE;
60 case ELK_CONDITIONAL_LE:
61 return ELK_CONDITIONAL_G;
62 default:
63 unreachable("Can't negate this cmod");
64 }
65 }
66
67 /* Returns the corresponding conditional mod for swapping src0 and
68 * src1 in e.g. CMP.
69 */
70 enum elk_conditional_mod
elk_swap_cmod(enum elk_conditional_mod cmod)71 elk_swap_cmod(enum elk_conditional_mod cmod)
72 {
73 switch (cmod) {
74 case ELK_CONDITIONAL_Z:
75 case ELK_CONDITIONAL_NZ:
76 return cmod;
77 case ELK_CONDITIONAL_G:
78 return ELK_CONDITIONAL_L;
79 case ELK_CONDITIONAL_GE:
80 return ELK_CONDITIONAL_LE;
81 case ELK_CONDITIONAL_L:
82 return ELK_CONDITIONAL_G;
83 case ELK_CONDITIONAL_LE:
84 return ELK_CONDITIONAL_GE;
85 default:
86 return ELK_CONDITIONAL_NONE;
87 }
88 }
89
90 /**
91 * Get the least significant bit offset of the i+1-th component of immediate
92 * type \p type. For \p i equal to the two's complement of j, return the
93 * offset of the j-th component starting from the end of the vector. For
94 * scalar register types return zero.
95 */
96 static unsigned
imm_shift(enum elk_reg_type type,unsigned i)97 imm_shift(enum elk_reg_type type, unsigned i)
98 {
99 assert(type != ELK_REGISTER_TYPE_UV && type != ELK_REGISTER_TYPE_V &&
100 "Not implemented.");
101
102 if (type == ELK_REGISTER_TYPE_VF)
103 return 8 * (i & 3);
104 else
105 return 0;
106 }
107
108 /**
109 * Swizzle an arbitrary immediate \p x of the given type according to the
110 * permutation specified as \p swz.
111 */
112 uint32_t
elk_swizzle_immediate(enum elk_reg_type type,uint32_t x,unsigned swz)113 elk_swizzle_immediate(enum elk_reg_type type, uint32_t x, unsigned swz)
114 {
115 if (imm_shift(type, 1)) {
116 const unsigned n = 32 / imm_shift(type, 1);
117 uint32_t y = 0;
118
119 for (unsigned i = 0; i < n; i++) {
120 /* Shift the specified component all the way to the right and left to
121 * discard any undesired L/MSBs, then shift it right into component i.
122 */
123 y |= x >> imm_shift(type, (i & ~3) + ELK_GET_SWZ(swz, i & 3))
124 << imm_shift(type, ~0u)
125 >> imm_shift(type, ~0u - i);
126 }
127
128 return y;
129 } else {
130 return x;
131 }
132 }
133
134 unsigned
elk_get_default_exec_size(struct elk_codegen * p)135 elk_get_default_exec_size(struct elk_codegen *p)
136 {
137 return p->current->exec_size;
138 }
139
140 unsigned
elk_get_default_group(struct elk_codegen * p)141 elk_get_default_group(struct elk_codegen *p)
142 {
143 return p->current->group;
144 }
145
146 unsigned
elk_get_default_access_mode(struct elk_codegen * p)147 elk_get_default_access_mode(struct elk_codegen *p)
148 {
149 return p->current->access_mode;
150 }
151
152 void
elk_set_default_exec_size(struct elk_codegen * p,unsigned value)153 elk_set_default_exec_size(struct elk_codegen *p, unsigned value)
154 {
155 p->current->exec_size = value;
156 }
157
elk_set_default_predicate_control(struct elk_codegen * p,enum elk_predicate pc)158 void elk_set_default_predicate_control(struct elk_codegen *p, enum elk_predicate pc)
159 {
160 p->current->predicate = pc;
161 }
162
elk_set_default_predicate_inverse(struct elk_codegen * p,bool predicate_inverse)163 void elk_set_default_predicate_inverse(struct elk_codegen *p, bool predicate_inverse)
164 {
165 p->current->pred_inv = predicate_inverse;
166 }
167
elk_set_default_flag_reg(struct elk_codegen * p,int reg,int subreg)168 void elk_set_default_flag_reg(struct elk_codegen *p, int reg, int subreg)
169 {
170 assert(subreg < 2);
171 p->current->flag_subreg = reg * 2 + subreg;
172 }
173
elk_set_default_access_mode(struct elk_codegen * p,unsigned access_mode)174 void elk_set_default_access_mode( struct elk_codegen *p, unsigned access_mode )
175 {
176 p->current->access_mode = access_mode;
177 }
178
179 void
elk_set_default_compression_control(struct elk_codegen * p,enum elk_compression compression_control)180 elk_set_default_compression_control(struct elk_codegen *p,
181 enum elk_compression compression_control)
182 {
183 switch (compression_control) {
184 case ELK_COMPRESSION_NONE:
185 /* This is the "use the first set of bits of dmask/vmask/arf
186 * according to execsize" option.
187 */
188 p->current->group = 0;
189 break;
190 case ELK_COMPRESSION_2NDHALF:
191 /* For SIMD8, this is "use the second set of 8 bits." */
192 p->current->group = 8;
193 break;
194 case ELK_COMPRESSION_COMPRESSED:
195 /* For SIMD16 instruction compression, use the first set of 16 bits
196 * since we don't do SIMD32 dispatch.
197 */
198 p->current->group = 0;
199 break;
200 default:
201 unreachable("not reached");
202 }
203
204 if (p->devinfo->ver <= 6) {
205 p->current->compressed =
206 (compression_control == ELK_COMPRESSION_COMPRESSED);
207 }
208 }
209
210 /**
211 * Enable or disable instruction compression on the given instruction leaving
212 * the currently selected channel enable group untouched.
213 */
214 void
elk_inst_set_compression(const struct intel_device_info * devinfo,elk_inst * inst,bool on)215 elk_inst_set_compression(const struct intel_device_info *devinfo,
216 elk_inst *inst, bool on)
217 {
218 if (devinfo->ver >= 6) {
219 /* No-op, the EU will figure out for us whether the instruction needs to
220 * be compressed.
221 */
222 } else {
223 /* The channel group and compression controls are non-orthogonal, there
224 * are two possible representations for uncompressed instructions and we
225 * may need to preserve the current one to avoid changing the selected
226 * channel group inadvertently.
227 */
228 if (on)
229 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_COMPRESSED);
230 else if (elk_inst_qtr_control(devinfo, inst)
231 == ELK_COMPRESSION_COMPRESSED)
232 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
233 }
234 }
235
236 void
elk_set_default_compression(struct elk_codegen * p,bool on)237 elk_set_default_compression(struct elk_codegen *p, bool on)
238 {
239 p->current->compressed = on;
240 }
241
242 /**
243 * Apply the range of channel enable signals given by
244 * [group, group + exec_size) to the instruction passed as argument.
245 */
246 void
elk_inst_set_group(const struct intel_device_info * devinfo,elk_inst * inst,unsigned group)247 elk_inst_set_group(const struct intel_device_info *devinfo,
248 elk_inst *inst, unsigned group)
249 {
250 if (devinfo->ver >= 7) {
251 assert(group % 4 == 0 && group < 32);
252 elk_inst_set_qtr_control(devinfo, inst, group / 8);
253 elk_inst_set_nib_control(devinfo, inst, (group / 4) % 2);
254
255 } else if (devinfo->ver == 6) {
256 assert(group % 8 == 0 && group < 32);
257 elk_inst_set_qtr_control(devinfo, inst, group / 8);
258
259 } else {
260 assert(group % 8 == 0 && group < 16);
261 /* The channel group and compression controls are non-orthogonal, there
262 * are two possible representations for group zero and we may need to
263 * preserve the current one to avoid changing the selected compression
264 * enable inadvertently.
265 */
266 if (group == 8)
267 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_2NDHALF);
268 else if (elk_inst_qtr_control(devinfo, inst) == ELK_COMPRESSION_2NDHALF)
269 elk_inst_set_qtr_control(devinfo, inst, ELK_COMPRESSION_NONE);
270 }
271 }
272
273 void
elk_set_default_group(struct elk_codegen * p,unsigned group)274 elk_set_default_group(struct elk_codegen *p, unsigned group)
275 {
276 p->current->group = group;
277 }
278
elk_set_default_mask_control(struct elk_codegen * p,unsigned value)279 void elk_set_default_mask_control( struct elk_codegen *p, unsigned value )
280 {
281 p->current->mask_control = value;
282 }
283
elk_set_default_saturate(struct elk_codegen * p,bool enable)284 void elk_set_default_saturate( struct elk_codegen *p, bool enable )
285 {
286 p->current->saturate = enable;
287 }
288
elk_set_default_acc_write_control(struct elk_codegen * p,unsigned value)289 void elk_set_default_acc_write_control(struct elk_codegen *p, unsigned value)
290 {
291 p->current->acc_wr_control = value;
292 }
293
elk_push_insn_state(struct elk_codegen * p)294 void elk_push_insn_state( struct elk_codegen *p )
295 {
296 assert(p->current != &p->stack[ELK_EU_MAX_INSN_STACK-1]);
297 *(p->current + 1) = *p->current;
298 p->current++;
299 }
300
elk_pop_insn_state(struct elk_codegen * p)301 void elk_pop_insn_state( struct elk_codegen *p )
302 {
303 assert(p->current != p->stack);
304 p->current--;
305 }
306
307
308 /***********************************************************************
309 */
310 void
elk_init_codegen(const struct elk_isa_info * isa,struct elk_codegen * p,void * mem_ctx)311 elk_init_codegen(const struct elk_isa_info *isa,
312 struct elk_codegen *p, void *mem_ctx)
313 {
314 memset(p, 0, sizeof(*p));
315
316 p->isa = isa;
317 p->devinfo = isa->devinfo;
318 p->automatic_exec_sizes = true;
319 /*
320 * Set the initial instruction store array size to 1024, if found that
321 * isn't enough, then it will double the store size at elk_next_insn()
322 * until out of memory.
323 */
324 p->store_size = 1024;
325 p->store = rzalloc_array(mem_ctx, elk_inst, p->store_size);
326 p->nr_insn = 0;
327 p->current = p->stack;
328 memset(p->current, 0, sizeof(p->current[0]));
329
330 p->mem_ctx = mem_ctx;
331
332 /* Some defaults?
333 */
334 elk_set_default_exec_size(p, ELK_EXECUTE_8);
335 elk_set_default_mask_control(p, ELK_MASK_ENABLE); /* what does this do? */
336 elk_set_default_saturate(p, 0);
337 elk_set_default_compression_control(p, ELK_COMPRESSION_NONE);
338
339 /* Set up control flow stack */
340 p->if_stack_depth = 0;
341 p->if_stack_array_size = 16;
342 p->if_stack = rzalloc_array(mem_ctx, int, p->if_stack_array_size);
343
344 p->loop_stack_depth = 0;
345 p->loop_stack_array_size = 16;
346 p->loop_stack = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
347 p->if_depth_in_loop = rzalloc_array(mem_ctx, int, p->loop_stack_array_size);
348 }
349
350
elk_get_program(struct elk_codegen * p,unsigned * sz)351 const unsigned *elk_get_program( struct elk_codegen *p,
352 unsigned *sz )
353 {
354 *sz = p->next_insn_offset;
355 return (const unsigned *)p->store;
356 }
357
358 const struct elk_shader_reloc *
elk_get_shader_relocs(struct elk_codegen * p,unsigned * num_relocs)359 elk_get_shader_relocs(struct elk_codegen *p, unsigned *num_relocs)
360 {
361 *num_relocs = p->num_relocs;
362 return p->relocs;
363 }
364
365 DEBUG_GET_ONCE_OPTION(shader_bin_dump_path, "INTEL_SHADER_BIN_DUMP_PATH", NULL);
366
elk_should_dump_shader_bin(void)367 bool elk_should_dump_shader_bin(void)
368 {
369 return debug_get_option_shader_bin_dump_path() != NULL;
370 }
371
elk_dump_shader_bin(void * assembly,int start_offset,int end_offset,const char * identifier)372 void elk_dump_shader_bin(void *assembly, int start_offset, int end_offset,
373 const char *identifier)
374 {
375 char *name = ralloc_asprintf(NULL, "%s/%s.bin",
376 debug_get_option_shader_bin_dump_path(),
377 identifier);
378
379 int fd = open(name, O_CREAT | O_WRONLY | O_TRUNC, 0644);
380 ralloc_free(name);
381
382 if (fd < 0)
383 return;
384
385 struct stat sb;
386 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
387 close(fd);
388 return;
389 }
390
391 size_t to_write = end_offset - start_offset;
392 void *write_ptr = assembly + start_offset;
393
394 while (to_write) {
395 ssize_t ret = write(fd, write_ptr, to_write);
396
397 if (ret <= 0) {
398 close(fd);
399 return;
400 }
401
402 to_write -= ret;
403 write_ptr += ret;
404 }
405
406 close(fd);
407 }
408
elk_try_override_assembly(struct elk_codegen * p,int start_offset,const char * identifier)409 bool elk_try_override_assembly(struct elk_codegen *p, int start_offset,
410 const char *identifier)
411 {
412 const char *read_path = getenv("INTEL_SHADER_ASM_READ_PATH");
413 if (!read_path) {
414 return false;
415 }
416
417 char *name = ralloc_asprintf(NULL, "%s/%s.bin", read_path, identifier);
418
419 int fd = open(name, O_RDONLY);
420 ralloc_free(name);
421
422 if (fd == -1) {
423 return false;
424 }
425
426 struct stat sb;
427 if (fstat(fd, &sb) != 0 || (!S_ISREG(sb.st_mode))) {
428 close(fd);
429 return false;
430 }
431
432 p->nr_insn -= (p->next_insn_offset - start_offset) / sizeof(elk_inst);
433 p->nr_insn += sb.st_size / sizeof(elk_inst);
434
435 p->next_insn_offset = start_offset + sb.st_size;
436 p->store_size = (start_offset + sb.st_size) / sizeof(elk_inst);
437 p->store = (elk_inst *)reralloc_size(p->mem_ctx, p->store, p->next_insn_offset);
438 assert(p->store);
439
440 ssize_t ret = read(fd, (char *)p->store + start_offset, sb.st_size);
441 close(fd);
442 if (ret != sb.st_size) {
443 return false;
444 }
445
446 ASSERTED bool valid =
447 elk_validate_instructions(p->isa, p->store,
448 start_offset, p->next_insn_offset,
449 NULL);
450 assert(valid);
451
452 return true;
453 }
454
455 const struct elk_label *
elk_find_label(const struct elk_label * root,int offset)456 elk_find_label(const struct elk_label *root, int offset)
457 {
458 const struct elk_label *curr = root;
459
460 if (curr != NULL)
461 {
462 do {
463 if (curr->offset == offset)
464 return curr;
465
466 curr = curr->next;
467 } while (curr != NULL);
468 }
469
470 return curr;
471 }
472
473 void
elk_create_label(struct elk_label ** labels,int offset,void * mem_ctx)474 elk_create_label(struct elk_label **labels, int offset, void *mem_ctx)
475 {
476 if (*labels != NULL) {
477 struct elk_label *curr = *labels;
478 struct elk_label *prev;
479
480 do {
481 prev = curr;
482
483 if (curr->offset == offset)
484 return;
485
486 curr = curr->next;
487 } while (curr != NULL);
488
489 curr = ralloc(mem_ctx, struct elk_label);
490 curr->offset = offset;
491 curr->number = prev->number + 1;
492 curr->next = NULL;
493 prev->next = curr;
494 } else {
495 struct elk_label *root = ralloc(mem_ctx, struct elk_label);
496 root->number = 0;
497 root->offset = offset;
498 root->next = NULL;
499 *labels = root;
500 }
501 }
502
503 const struct elk_label *
elk_label_assembly(const struct elk_isa_info * isa,const void * assembly,int start,int end,void * mem_ctx)504 elk_label_assembly(const struct elk_isa_info *isa,
505 const void *assembly, int start, int end, void *mem_ctx)
506 {
507 const struct intel_device_info *const devinfo = isa->devinfo;
508
509 struct elk_label *root_label = NULL;
510
511 int to_bytes_scale = sizeof(elk_inst) / elk_jump_scale(devinfo);
512
513 for (int offset = start; offset < end;) {
514 const elk_inst *inst = (const elk_inst *) ((const char *) assembly + offset);
515 elk_inst uncompacted;
516
517 bool is_compact = elk_inst_cmpt_control(devinfo, inst);
518
519 if (is_compact) {
520 elk_compact_inst *compacted = (elk_compact_inst *)inst;
521 elk_uncompact_instruction(isa, &uncompacted, compacted);
522 inst = &uncompacted;
523 }
524
525 if (elk_has_uip(devinfo, elk_inst_opcode(isa, inst))) {
526 /* Instructions that have UIP also have JIP. */
527 elk_create_label(&root_label,
528 offset + elk_inst_uip(devinfo, inst) * to_bytes_scale, mem_ctx);
529 elk_create_label(&root_label,
530 offset + elk_inst_jip(devinfo, inst) * to_bytes_scale, mem_ctx);
531 } else if (elk_has_jip(devinfo, elk_inst_opcode(isa, inst))) {
532 int jip;
533 if (devinfo->ver >= 7) {
534 jip = elk_inst_jip(devinfo, inst);
535 } else {
536 jip = elk_inst_gfx6_jump_count(devinfo, inst);
537 }
538
539 elk_create_label(&root_label, offset + jip * to_bytes_scale, mem_ctx);
540 }
541
542 if (is_compact) {
543 offset += sizeof(elk_compact_inst);
544 } else {
545 offset += sizeof(elk_inst);
546 }
547 }
548
549 return root_label;
550 }
551
552 void
elk_disassemble_with_labels(const struct elk_isa_info * isa,const void * assembly,int start,int end,FILE * out)553 elk_disassemble_with_labels(const struct elk_isa_info *isa,
554 const void *assembly, int start, int end, FILE *out)
555 {
556 void *mem_ctx = ralloc_context(NULL);
557 const struct elk_label *root_label =
558 elk_label_assembly(isa, assembly, start, end, mem_ctx);
559
560 elk_disassemble(isa, assembly, start, end, root_label, out);
561
562 ralloc_free(mem_ctx);
563 }
564
565 void
elk_disassemble(const struct elk_isa_info * isa,const void * assembly,int start,int end,const struct elk_label * root_label,FILE * out)566 elk_disassemble(const struct elk_isa_info *isa,
567 const void *assembly, int start, int end,
568 const struct elk_label *root_label, FILE *out)
569 {
570 const struct intel_device_info *devinfo = isa->devinfo;
571
572 bool dump_hex = INTEL_DEBUG(DEBUG_HEX);
573
574 for (int offset = start; offset < end;) {
575 const elk_inst *insn = (const elk_inst *)((char *)assembly + offset);
576 elk_inst uncompacted;
577
578 if (root_label != NULL) {
579 const struct elk_label *label = elk_find_label(root_label, offset);
580 if (label != NULL) {
581 fprintf(out, "\nLABEL%d:\n", label->number);
582 }
583 }
584
585 bool compacted = elk_inst_cmpt_control(devinfo, insn);
586 if (0)
587 fprintf(out, "0x%08x: ", offset);
588
589 if (compacted) {
590 elk_compact_inst *compacted = (elk_compact_inst *)insn;
591 if (dump_hex) {
592 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
593 const unsigned int blank_spaces = 24;
594 for (int i = 0 ; i < 8; i = i + 4) {
595 fprintf(out, "%02x %02x %02x %02x ",
596 insn_ptr[i],
597 insn_ptr[i + 1],
598 insn_ptr[i + 2],
599 insn_ptr[i + 3]);
600 }
601 /* Make compacted instructions hex value output vertically aligned
602 * with uncompacted instructions hex value
603 */
604 fprintf(out, "%*c", blank_spaces, ' ');
605 }
606
607 elk_uncompact_instruction(isa, &uncompacted, compacted);
608 insn = &uncompacted;
609 } else {
610 if (dump_hex) {
611 unsigned char * insn_ptr = ((unsigned char *)&insn[0]);
612 for (int i = 0 ; i < 16; i = i + 4) {
613 fprintf(out, "%02x %02x %02x %02x ",
614 insn_ptr[i],
615 insn_ptr[i + 1],
616 insn_ptr[i + 2],
617 insn_ptr[i + 3]);
618 }
619 }
620 }
621
622 elk_disassemble_inst(out, isa, insn, compacted, offset, root_label);
623
624 if (compacted) {
625 offset += sizeof(elk_compact_inst);
626 } else {
627 offset += sizeof(elk_inst);
628 }
629 }
630 }
631
632 static const struct elk_opcode_desc opcode_descs[] = {
633 /* IR, HW, name, nsrc, ndst, gfx_vers */
634 { ELK_OPCODE_ILLEGAL, 0, "illegal", 0, 0, GFX_ALL },
635 { ELK_OPCODE_MOV, 1, "mov", 1, 1, GFX_LT(GFX12) },
636 { ELK_OPCODE_MOV, 97, "mov", 1, 1, GFX_GE(GFX12) },
637 { ELK_OPCODE_SEL, 2, "sel", 2, 1, GFX_LT(GFX12) },
638 { ELK_OPCODE_SEL, 98, "sel", 2, 1, GFX_GE(GFX12) },
639 { ELK_OPCODE_MOVI, 3, "movi", 2, 1, GFX_GE(GFX45) & GFX_LT(GFX12) },
640 { ELK_OPCODE_MOVI, 99, "movi", 2, 1, GFX_GE(GFX12) },
641 { ELK_OPCODE_NOT, 4, "not", 1, 1, GFX_LT(GFX12) },
642 { ELK_OPCODE_NOT, 100, "not", 1, 1, GFX_GE(GFX12) },
643 { ELK_OPCODE_AND, 5, "and", 2, 1, GFX_LT(GFX12) },
644 { ELK_OPCODE_AND, 101, "and", 2, 1, GFX_GE(GFX12) },
645 { ELK_OPCODE_OR, 6, "or", 2, 1, GFX_LT(GFX12) },
646 { ELK_OPCODE_OR, 102, "or", 2, 1, GFX_GE(GFX12) },
647 { ELK_OPCODE_XOR, 7, "xor", 2, 1, GFX_LT(GFX12) },
648 { ELK_OPCODE_XOR, 103, "xor", 2, 1, GFX_GE(GFX12) },
649 { ELK_OPCODE_SHR, 8, "shr", 2, 1, GFX_LT(GFX12) },
650 { ELK_OPCODE_SHR, 104, "shr", 2, 1, GFX_GE(GFX12) },
651 { ELK_OPCODE_SHL, 9, "shl", 2, 1, GFX_LT(GFX12) },
652 { ELK_OPCODE_SHL, 105, "shl", 2, 1, GFX_GE(GFX12) },
653 { ELK_OPCODE_DIM, 10, "dim", 1, 1, GFX75 },
654 { ELK_OPCODE_SMOV, 10, "smov", 0, 0, GFX_GE(GFX8) & GFX_LT(GFX12) },
655 { ELK_OPCODE_SMOV, 106, "smov", 0, 0, GFX_GE(GFX12) },
656 { ELK_OPCODE_ASR, 12, "asr", 2, 1, GFX_LT(GFX12) },
657 { ELK_OPCODE_ASR, 108, "asr", 2, 1, GFX_GE(GFX12) },
658 { ELK_OPCODE_CMP, 16, "cmp", 2, 1, GFX_LT(GFX12) },
659 { ELK_OPCODE_CMP, 112, "cmp", 2, 1, GFX_GE(GFX12) },
660 { ELK_OPCODE_CMPN, 17, "cmpn", 2, 1, GFX_LT(GFX12) },
661 { ELK_OPCODE_CMPN, 113, "cmpn", 2, 1, GFX_GE(GFX12) },
662 { ELK_OPCODE_CSEL, 18, "csel", 3, 1, GFX_GE(GFX8) & GFX_LT(GFX12) },
663 { ELK_OPCODE_CSEL, 114, "csel", 3, 1, GFX_GE(GFX12) },
664 { ELK_OPCODE_F32TO16, 19, "f32to16", 1, 1, GFX7 | GFX75 },
665 { ELK_OPCODE_F16TO32, 20, "f16to32", 1, 1, GFX7 | GFX75 },
666 { ELK_OPCODE_BFREV, 23, "bfrev", 1, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
667 { ELK_OPCODE_BFREV, 119, "bfrev", 1, 1, GFX_GE(GFX12) },
668 { ELK_OPCODE_BFE, 24, "bfe", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
669 { ELK_OPCODE_BFE, 120, "bfe", 3, 1, GFX_GE(GFX12) },
670 { ELK_OPCODE_BFI1, 25, "bfi1", 2, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
671 { ELK_OPCODE_BFI1, 121, "bfi1", 2, 1, GFX_GE(GFX12) },
672 { ELK_OPCODE_BFI2, 26, "bfi2", 3, 1, GFX_GE(GFX7) & GFX_LT(GFX12) },
673 { ELK_OPCODE_BFI2, 122, "bfi2", 3, 1, GFX_GE(GFX12) },
674 { ELK_OPCODE_JMPI, 32, "jmpi", 0, 0, GFX_ALL },
675 { ELK_OPCODE_BRD, 33, "brd", 0, 0, GFX_GE(GFX7) },
676 { ELK_OPCODE_IF, 34, "if", 0, 0, GFX_ALL },
677 { ELK_OPCODE_IFF, 35, "iff", 0, 0, GFX_LE(GFX5) },
678 { ELK_OPCODE_BRC, 35, "brc", 0, 0, GFX_GE(GFX7) },
679 { ELK_OPCODE_ELSE, 36, "else", 0, 0, GFX_ALL },
680 { ELK_OPCODE_ENDIF, 37, "endif", 0, 0, GFX_ALL },
681 { ELK_OPCODE_DO, 38, "do", 0, 0, GFX_LE(GFX5) },
682 { ELK_OPCODE_CASE, 38, "case", 0, 0, GFX6 },
683 { ELK_OPCODE_WHILE, 39, "while", 0, 0, GFX_ALL },
684 { ELK_OPCODE_BREAK, 40, "break", 0, 0, GFX_ALL },
685 { ELK_OPCODE_CONTINUE, 41, "cont", 0, 0, GFX_ALL },
686 { ELK_OPCODE_HALT, 42, "halt", 0, 0, GFX_ALL },
687 { ELK_OPCODE_CALLA, 43, "calla", 0, 0, GFX_GE(GFX75) },
688 { ELK_OPCODE_MSAVE, 44, "msave", 0, 0, GFX_LE(GFX5) },
689 { ELK_OPCODE_CALL, 44, "call", 0, 0, GFX_GE(GFX6) },
690 { ELK_OPCODE_MREST, 45, "mrest", 0, 0, GFX_LE(GFX5) },
691 { ELK_OPCODE_RET, 45, "ret", 0, 0, GFX_GE(GFX6) },
692 { ELK_OPCODE_PUSH, 46, "push", 0, 0, GFX_LE(GFX5) },
693 { ELK_OPCODE_FORK, 46, "fork", 0, 0, GFX6 },
694 { ELK_OPCODE_GOTO, 46, "goto", 0, 0, GFX_GE(GFX8) },
695 { ELK_OPCODE_POP, 47, "pop", 2, 0, GFX_LE(GFX5) },
696 { ELK_OPCODE_WAIT, 48, "wait", 0, 1, GFX_LT(GFX12) },
697 { ELK_OPCODE_SEND, 49, "send", 1, 1, GFX_LT(GFX12) },
698 { ELK_OPCODE_SENDC, 50, "sendc", 1, 1, GFX_LT(GFX12) },
699 { ELK_OPCODE_SEND, 49, "send", 2, 1, GFX_GE(GFX12) },
700 { ELK_OPCODE_SENDC, 50, "sendc", 2, 1, GFX_GE(GFX12) },
701 { ELK_OPCODE_MATH, 56, "math", 2, 1, GFX_GE(GFX6) },
702 { ELK_OPCODE_ADD, 64, "add", 2, 1, GFX_ALL },
703 { ELK_OPCODE_MUL, 65, "mul", 2, 1, GFX_ALL },
704 { ELK_OPCODE_AVG, 66, "avg", 2, 1, GFX_ALL },
705 { ELK_OPCODE_FRC, 67, "frc", 1, 1, GFX_ALL },
706 { ELK_OPCODE_RNDU, 68, "rndu", 1, 1, GFX_ALL },
707 { ELK_OPCODE_RNDD, 69, "rndd", 1, 1, GFX_ALL },
708 { ELK_OPCODE_RNDE, 70, "rnde", 1, 1, GFX_ALL },
709 { ELK_OPCODE_RNDZ, 71, "rndz", 1, 1, GFX_ALL },
710 { ELK_OPCODE_MAC, 72, "mac", 2, 1, GFX_ALL },
711 { ELK_OPCODE_MACH, 73, "mach", 2, 1, GFX_ALL },
712 { ELK_OPCODE_LZD, 74, "lzd", 1, 1, GFX_ALL },
713 { ELK_OPCODE_FBH, 75, "fbh", 1, 1, GFX_GE(GFX7) },
714 { ELK_OPCODE_FBL, 76, "fbl", 1, 1, GFX_GE(GFX7) },
715 { ELK_OPCODE_CBIT, 77, "cbit", 1, 1, GFX_GE(GFX7) },
716 { ELK_OPCODE_ADDC, 78, "addc", 2, 1, GFX_GE(GFX7) },
717 { ELK_OPCODE_SUBB, 79, "subb", 2, 1, GFX_GE(GFX7) },
718 { ELK_OPCODE_SAD2, 80, "sad2", 2, 1, GFX_ALL },
719 { ELK_OPCODE_SADA2, 81, "sada2", 2, 1, GFX_ALL },
720 { ELK_OPCODE_DP4, 84, "dp4", 2, 1, GFX_LT(GFX11) },
721 { ELK_OPCODE_DPH, 85, "dph", 2, 1, GFX_LT(GFX11) },
722 { ELK_OPCODE_DP3, 86, "dp3", 2, 1, GFX_LT(GFX11) },
723 { ELK_OPCODE_DP2, 87, "dp2", 2, 1, GFX_LT(GFX11) },
724 { ELK_OPCODE_LINE, 89, "line", 2, 1, GFX_LE(GFX10) },
725 { ELK_OPCODE_PLN, 90, "pln", 2, 1, GFX_GE(GFX45) & GFX_LE(GFX10) },
726 { ELK_OPCODE_MAD, 91, "mad", 3, 1, GFX_GE(GFX6) },
727 { ELK_OPCODE_LRP, 92, "lrp", 3, 1, GFX_GE(GFX6) & GFX_LE(GFX10) },
728 { ELK_OPCODE_MADM, 93, "madm", 3, 1, GFX_GE(GFX8) },
729 { ELK_OPCODE_NENOP, 125, "nenop", 0, 0, GFX45 },
730 { ELK_OPCODE_NOP, 126, "nop", 0, 0, GFX_LT(GFX12) },
731 { ELK_OPCODE_NOP, 96, "nop", 0, 0, GFX_GE(GFX12) }
732 };
733
734 void
elk_init_isa_info(struct elk_isa_info * isa,const struct intel_device_info * devinfo)735 elk_init_isa_info(struct elk_isa_info *isa,
736 const struct intel_device_info *devinfo)
737 {
738 isa->devinfo = devinfo;
739
740 enum gfx_ver ver = gfx_ver_from_devinfo(devinfo);
741
742 memset(isa->ir_to_descs, 0, sizeof(isa->ir_to_descs));
743 memset(isa->hw_to_descs, 0, sizeof(isa->hw_to_descs));
744
745 for (unsigned i = 0; i < ARRAY_SIZE(opcode_descs); i++) {
746 if (opcode_descs[i].gfx_vers & ver) {
747 const unsigned e = opcode_descs[i].ir;
748 const unsigned h = opcode_descs[i].hw;
749 assert(e < ARRAY_SIZE(isa->ir_to_descs) && !isa->ir_to_descs[e]);
750 assert(h < ARRAY_SIZE(isa->hw_to_descs) && !isa->hw_to_descs[h]);
751 isa->ir_to_descs[e] = &opcode_descs[i];
752 isa->hw_to_descs[h] = &opcode_descs[i];
753 }
754 }
755 }
756
757 /**
758 * Return the matching opcode_desc for the specified IR opcode and hardware
759 * generation, or NULL if the opcode is not supported by the device.
760 */
761 const struct elk_opcode_desc *
elk_opcode_desc(const struct elk_isa_info * isa,enum elk_opcode op)762 elk_opcode_desc(const struct elk_isa_info *isa, enum elk_opcode op)
763 {
764 return op < ARRAY_SIZE(isa->ir_to_descs) ? isa->ir_to_descs[op] : NULL;
765 }
766
767 /**
768 * Return the matching opcode_desc for the specified HW opcode and hardware
769 * generation, or NULL if the opcode is not supported by the device.
770 */
771 const struct elk_opcode_desc *
elk_opcode_desc_from_hw(const struct elk_isa_info * isa,unsigned hw)772 elk_opcode_desc_from_hw(const struct elk_isa_info *isa, unsigned hw)
773 {
774 return hw < ARRAY_SIZE(isa->hw_to_descs) ? isa->hw_to_descs[hw] : NULL;
775 }
776
777 unsigned
elk_num_sources_from_inst(const struct elk_isa_info * isa,const elk_inst * inst)778 elk_num_sources_from_inst(const struct elk_isa_info *isa,
779 const elk_inst *inst)
780 {
781 const struct intel_device_info *devinfo = isa->devinfo;
782 const struct elk_opcode_desc *desc =
783 elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
784 unsigned math_function;
785
786 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
787 math_function = elk_inst_math_function(devinfo, inst);
788 } else if (devinfo->ver < 6 &&
789 elk_inst_opcode(isa, inst) == ELK_OPCODE_SEND) {
790 if (elk_inst_sfid(devinfo, inst) == ELK_SFID_MATH) {
791 /* src1 must be a descriptor (including the information to determine
792 * that the SEND is doing an extended math operation), but src0 can
793 * actually be null since it serves as the source of the implicit GRF
794 * to MRF move.
795 *
796 * If we stop using that functionality, we'll have to revisit this.
797 */
798 return 2;
799 } else {
800 /* Send instructions are allowed to have null sources since they use
801 * the base_mrf field to specify which message register source.
802 */
803 return 0;
804 }
805 } else {
806 assert(desc->nsrc < 4);
807 return desc->nsrc;
808 }
809
810 switch (math_function) {
811 case ELK_MATH_FUNCTION_INV:
812 case ELK_MATH_FUNCTION_LOG:
813 case ELK_MATH_FUNCTION_EXP:
814 case ELK_MATH_FUNCTION_SQRT:
815 case ELK_MATH_FUNCTION_RSQ:
816 case ELK_MATH_FUNCTION_SIN:
817 case ELK_MATH_FUNCTION_COS:
818 case ELK_MATH_FUNCTION_SINCOS:
819 case GFX8_MATH_FUNCTION_INVM:
820 case GFX8_MATH_FUNCTION_RSQRTM:
821 return 1;
822 case ELK_MATH_FUNCTION_FDIV:
823 case ELK_MATH_FUNCTION_POW:
824 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
825 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
826 case ELK_MATH_FUNCTION_INT_DIV_REMAINDER:
827 return 2;
828 default:
829 unreachable("not reached");
830 }
831 }
832