1 /*
2 * Copyright © 2015-2019 Intel Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21 * IN THE SOFTWARE.
22 */
23
24 /** @file elk_eu_validate.c
25 *
26 * This file implements a pass that validates shader assembly.
27 *
28 * The restrictions implemented herein are intended to verify that instructions
29 * in shader assembly do not violate restrictions documented in the graphics
30 * programming reference manuals.
31 *
32 * The restrictions are difficult for humans to quickly verify due to their
33 * complexity and abundance.
34 *
35 * It is critical that this code is thoroughly unit tested because false
36 * results will lead developers astray, which is worse than having no validator
37 * at all. Functional changes to this file without corresponding unit tests (in
38 * test_eu_validate.cpp) will be rejected.
39 */
40
41 #include <stdlib.h>
42 #include "elk_eu.h"
43 #include "elk_disasm_info.h"
44
45 /* We're going to do lots of string concatenation, so this should help. */
46 struct string {
47 char *str;
48 size_t len;
49 };
50
51 static void
cat(struct string * dest,const struct string src)52 cat(struct string *dest, const struct string src)
53 {
54 dest->str = realloc(dest->str, dest->len + src.len + 1);
55 memcpy(dest->str + dest->len, src.str, src.len);
56 dest->str[dest->len + src.len] = '\0';
57 dest->len = dest->len + src.len;
58 }
59 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
60
61 static bool
contains(const struct string haystack,const struct string needle)62 contains(const struct string haystack, const struct string needle)
63 {
64 return haystack.str && memmem(haystack.str, haystack.len,
65 needle.str, needle.len) != NULL;
66 }
67 #define CONTAINS(haystack, needle) \
68 contains(haystack, (struct string){needle, strlen(needle)})
69
70 #define error(str) "\tERROR: " str "\n"
71 #define ERROR_INDENT "\t "
72
73 #define ERROR(msg) ERROR_IF(true, msg)
74 #define ERROR_IF(cond, msg) \
75 do { \
76 if ((cond) && !CONTAINS(error_msg, error(msg))) { \
77 CAT(error_msg, error(msg)); \
78 } \
79 } while(0)
80
81 #define CHECK(func, args...) \
82 do { \
83 struct string __msg = func(isa, inst, ##args); \
84 if (__msg.str) { \
85 cat(&error_msg, __msg); \
86 free(__msg.str); \
87 } \
88 } while (0)
89
90 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
91 #define WIDTH(width) (1 << (width))
92
93 static bool
inst_is_send(const struct elk_isa_info * isa,const elk_inst * inst)94 inst_is_send(const struct elk_isa_info *isa, const elk_inst *inst)
95 {
96 switch (elk_inst_opcode(isa, inst)) {
97 case ELK_OPCODE_SEND:
98 case ELK_OPCODE_SENDC:
99 return true;
100 default:
101 return false;
102 }
103 }
104
105 static unsigned
signed_type(unsigned type)106 signed_type(unsigned type)
107 {
108 switch (type) {
109 case ELK_REGISTER_TYPE_UD: return ELK_REGISTER_TYPE_D;
110 case ELK_REGISTER_TYPE_UW: return ELK_REGISTER_TYPE_W;
111 case ELK_REGISTER_TYPE_UB: return ELK_REGISTER_TYPE_B;
112 case ELK_REGISTER_TYPE_UQ: return ELK_REGISTER_TYPE_Q;
113 default: return type;
114 }
115 }
116
117 static bool
inst_is_raw_move(const struct elk_isa_info * isa,const elk_inst * inst)118 inst_is_raw_move(const struct elk_isa_info *isa, const elk_inst *inst)
119 {
120 const struct intel_device_info *devinfo = isa->devinfo;
121
122 unsigned dst_type = signed_type(elk_inst_dst_type(devinfo, inst));
123 unsigned src_type = signed_type(elk_inst_src0_type(devinfo, inst));
124
125 if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
126 /* FIXME: not strictly true */
127 if (elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_VF ||
128 elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_UV ||
129 elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_V) {
130 return false;
131 }
132 } else if (elk_inst_src0_negate(devinfo, inst) ||
133 elk_inst_src0_abs(devinfo, inst)) {
134 return false;
135 }
136
137 return elk_inst_opcode(isa, inst) == ELK_OPCODE_MOV &&
138 elk_inst_saturate(devinfo, inst) == 0 &&
139 dst_type == src_type;
140 }
141
142 static bool
dst_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)143 dst_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
144 {
145 return elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
146 elk_inst_dst_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
147 }
148
149 static bool
src0_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)150 src0_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
151 {
152 return elk_inst_src0_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT &&
153 elk_inst_src0_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
154 elk_inst_src0_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
155 }
156
157 static bool
src1_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)158 src1_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
159 {
160 return elk_inst_src1_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
161 elk_inst_src1_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
162 }
163
164 static bool
src0_is_acc(const struct intel_device_info * devinfo,const elk_inst * inst)165 src0_is_acc(const struct intel_device_info *devinfo, const elk_inst *inst)
166 {
167 return elk_inst_src0_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
168 (elk_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == ELK_ARF_ACCUMULATOR;
169 }
170
171 static bool
src1_is_acc(const struct intel_device_info * devinfo,const elk_inst * inst)172 src1_is_acc(const struct intel_device_info *devinfo, const elk_inst *inst)
173 {
174 return elk_inst_src1_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
175 (elk_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == ELK_ARF_ACCUMULATOR;
176 }
177
178 static bool
src0_has_scalar_region(const struct intel_device_info * devinfo,const elk_inst * inst)179 src0_has_scalar_region(const struct intel_device_info *devinfo,
180 const elk_inst *inst)
181 {
182 return elk_inst_src0_vstride(devinfo, inst) == ELK_VERTICAL_STRIDE_0 &&
183 elk_inst_src0_width(devinfo, inst) == ELK_WIDTH_1 &&
184 elk_inst_src0_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0;
185 }
186
187 static bool
src1_has_scalar_region(const struct intel_device_info * devinfo,const elk_inst * inst)188 src1_has_scalar_region(const struct intel_device_info *devinfo,
189 const elk_inst *inst)
190 {
191 return elk_inst_src1_vstride(devinfo, inst) == ELK_VERTICAL_STRIDE_0 &&
192 elk_inst_src1_width(devinfo, inst) == ELK_WIDTH_1 &&
193 elk_inst_src1_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0;
194 }
195
196 static struct string
invalid_values(const struct elk_isa_info * isa,const elk_inst * inst)197 invalid_values(const struct elk_isa_info *isa, const elk_inst *inst)
198 {
199 const struct intel_device_info *devinfo = isa->devinfo;
200
201 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
202 struct string error_msg = { .str = NULL, .len = 0 };
203
204 switch ((enum elk_execution_size) elk_inst_exec_size(devinfo, inst)) {
205 case ELK_EXECUTE_1:
206 case ELK_EXECUTE_2:
207 case ELK_EXECUTE_4:
208 case ELK_EXECUTE_8:
209 case ELK_EXECUTE_16:
210 case ELK_EXECUTE_32:
211 break;
212 default:
213 ERROR("invalid execution size");
214 break;
215 }
216
217 if (error_msg.str)
218 return error_msg;
219
220 if (inst_is_send(isa, inst))
221 return error_msg;
222
223 if (num_sources == 3) {
224 /* Nothing to test:
225 * No 3-src instructions on Gfx4-5
226 * No reg file bits on Gfx6-10 (align16)
227 * No invalid encodings on Gfx10-12 (align1)
228 */
229 } else {
230 if (devinfo->ver > 6) {
231 ERROR_IF(elk_inst_dst_reg_file(devinfo, inst) == MRF ||
232 (num_sources > 0 &&
233 elk_inst_src0_reg_file(devinfo, inst) == MRF) ||
234 (num_sources > 1 &&
235 elk_inst_src1_reg_file(devinfo, inst) == MRF),
236 "invalid register file encoding");
237 }
238 }
239
240 if (error_msg.str)
241 return error_msg;
242
243 if (num_sources == 3) {
244 if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
245 ERROR("Align1 mode not allowed on Gen < 10");
246 } else {
247 ERROR_IF(elk_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
248 elk_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
249 "invalid register type encoding");
250 }
251 } else {
252 ERROR_IF(elk_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
253 (num_sources > 0 &&
254 elk_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
255 (num_sources > 1 &&
256 elk_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
257 "invalid register type encoding");
258 }
259
260 return error_msg;
261 }
262
263 static struct string
sources_not_null(const struct elk_isa_info * isa,const elk_inst * inst)264 sources_not_null(const struct elk_isa_info *isa,
265 const elk_inst *inst)
266 {
267 const struct intel_device_info *devinfo = isa->devinfo;
268 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
269 struct string error_msg = { .str = NULL, .len = 0 };
270
271 /* Nothing to test. 3-src instructions can only have GRF sources, and
272 * there's no bit to control the file.
273 */
274 if (num_sources == 3)
275 return (struct string){};
276
277 if (num_sources >= 1)
278 ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
279
280 if (num_sources == 2)
281 ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
282
283 return error_msg;
284 }
285
286 static bool
inst_uses_src_acc(const struct elk_isa_info * isa,const elk_inst * inst)287 inst_uses_src_acc(const struct elk_isa_info *isa,
288 const elk_inst *inst)
289 {
290 const struct intel_device_info *devinfo = isa->devinfo;
291
292 /* Check instructions that use implicit accumulator sources */
293 switch (elk_inst_opcode(isa, inst)) {
294 case ELK_OPCODE_MAC:
295 case ELK_OPCODE_MACH:
296 case ELK_OPCODE_SADA2:
297 return true;
298 default:
299 break;
300 }
301
302 /* FIXME: support 3-src instructions */
303 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
304 assert(num_sources < 3);
305
306 return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
307 }
308
309 static struct string
send_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)310 send_restrictions(const struct elk_isa_info *isa,
311 const elk_inst *inst)
312 {
313 const struct intel_device_info *devinfo = isa->devinfo;
314
315 struct string error_msg = { .str = NULL, .len = 0 };
316
317 if (inst_is_send(isa, inst)) {
318 ERROR_IF(elk_inst_src0_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT,
319 "send must use direct addressing");
320
321 if (devinfo->ver >= 7) {
322 ERROR_IF(elk_inst_send_src0_reg_file(devinfo, inst) != ELK_GENERAL_REGISTER_FILE,
323 "send from non-GRF");
324 ERROR_IF(elk_inst_eot(devinfo, inst) &&
325 elk_inst_src0_da_reg_nr(devinfo, inst) < 112,
326 "send with EOT must use g112-g127");
327 }
328
329 if (devinfo->ver >= 8) {
330 ERROR_IF(!dst_is_null(devinfo, inst) &&
331 (elk_inst_dst_da_reg_nr(devinfo, inst) +
332 elk_inst_rlen(devinfo, inst) > 127) &&
333 (elk_inst_src0_da_reg_nr(devinfo, inst) +
334 elk_inst_mlen(devinfo, inst) >
335 elk_inst_dst_da_reg_nr(devinfo, inst)),
336 "r127 must not be used for return address when there is "
337 "a src and dest overlap");
338 }
339 }
340
341 return error_msg;
342 }
343
344 static bool
is_unsupported_inst(const struct elk_isa_info * isa,const elk_inst * inst)345 is_unsupported_inst(const struct elk_isa_info *isa,
346 const elk_inst *inst)
347 {
348 return elk_inst_opcode(isa, inst) == ELK_OPCODE_ILLEGAL;
349 }
350
351 /**
352 * Returns whether a combination of two types would qualify as mixed float
353 * operation mode
354 */
355 static inline bool
types_are_mixed_float(enum elk_reg_type t0,enum elk_reg_type t1)356 types_are_mixed_float(enum elk_reg_type t0, enum elk_reg_type t1)
357 {
358 return (t0 == ELK_REGISTER_TYPE_F && t1 == ELK_REGISTER_TYPE_HF) ||
359 (t1 == ELK_REGISTER_TYPE_F && t0 == ELK_REGISTER_TYPE_HF);
360 }
361
362 static enum elk_reg_type
execution_type_for_type(enum elk_reg_type type)363 execution_type_for_type(enum elk_reg_type type)
364 {
365 switch (type) {
366 case ELK_REGISTER_TYPE_NF:
367 case ELK_REGISTER_TYPE_DF:
368 case ELK_REGISTER_TYPE_F:
369 case ELK_REGISTER_TYPE_HF:
370 return type;
371
372 case ELK_REGISTER_TYPE_VF:
373 return ELK_REGISTER_TYPE_F;
374
375 case ELK_REGISTER_TYPE_Q:
376 case ELK_REGISTER_TYPE_UQ:
377 return ELK_REGISTER_TYPE_Q;
378
379 case ELK_REGISTER_TYPE_D:
380 case ELK_REGISTER_TYPE_UD:
381 return ELK_REGISTER_TYPE_D;
382
383 case ELK_REGISTER_TYPE_W:
384 case ELK_REGISTER_TYPE_UW:
385 case ELK_REGISTER_TYPE_B:
386 case ELK_REGISTER_TYPE_UB:
387 case ELK_REGISTER_TYPE_V:
388 case ELK_REGISTER_TYPE_UV:
389 return ELK_REGISTER_TYPE_W;
390 }
391 unreachable("not reached");
392 }
393
394 /**
395 * Returns the execution type of an instruction \p inst
396 */
397 static enum elk_reg_type
execution_type(const struct elk_isa_info * isa,const elk_inst * inst)398 execution_type(const struct elk_isa_info *isa, const elk_inst *inst)
399 {
400 const struct intel_device_info *devinfo = isa->devinfo;
401
402 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
403 enum elk_reg_type src0_exec_type, src1_exec_type;
404
405 /* Execution data type is independent of destination data type, except in
406 * mixed F/HF instructions.
407 */
408 enum elk_reg_type dst_exec_type = elk_inst_dst_type(devinfo, inst);
409
410 src0_exec_type = execution_type_for_type(elk_inst_src0_type(devinfo, inst));
411 if (num_sources == 1) {
412 if (src0_exec_type == ELK_REGISTER_TYPE_HF)
413 return dst_exec_type;
414 return src0_exec_type;
415 }
416
417 src1_exec_type = execution_type_for_type(elk_inst_src1_type(devinfo, inst));
418 if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
419 types_are_mixed_float(src0_exec_type, dst_exec_type) ||
420 types_are_mixed_float(src1_exec_type, dst_exec_type)) {
421 return ELK_REGISTER_TYPE_F;
422 }
423
424 if (src0_exec_type == src1_exec_type)
425 return src0_exec_type;
426
427 if (src0_exec_type == ELK_REGISTER_TYPE_NF ||
428 src1_exec_type == ELK_REGISTER_TYPE_NF)
429 return ELK_REGISTER_TYPE_NF;
430
431 /* Mixed operand types where one is float is float on Gen < 6
432 * (and not allowed on later platforms)
433 */
434 if (devinfo->ver < 6 &&
435 (src0_exec_type == ELK_REGISTER_TYPE_F ||
436 src1_exec_type == ELK_REGISTER_TYPE_F))
437 return ELK_REGISTER_TYPE_F;
438
439 if (src0_exec_type == ELK_REGISTER_TYPE_Q ||
440 src1_exec_type == ELK_REGISTER_TYPE_Q)
441 return ELK_REGISTER_TYPE_Q;
442
443 if (src0_exec_type == ELK_REGISTER_TYPE_D ||
444 src1_exec_type == ELK_REGISTER_TYPE_D)
445 return ELK_REGISTER_TYPE_D;
446
447 if (src0_exec_type == ELK_REGISTER_TYPE_W ||
448 src1_exec_type == ELK_REGISTER_TYPE_W)
449 return ELK_REGISTER_TYPE_W;
450
451 if (src0_exec_type == ELK_REGISTER_TYPE_DF ||
452 src1_exec_type == ELK_REGISTER_TYPE_DF)
453 return ELK_REGISTER_TYPE_DF;
454
455 unreachable("not reached");
456 }
457
458 /**
459 * Returns whether a region is packed
460 *
461 * A region is packed if its elements are adjacent in memory, with no
462 * intervening space, no overlap, and no replicated values.
463 */
464 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)465 is_packed(unsigned vstride, unsigned width, unsigned hstride)
466 {
467 if (vstride == width) {
468 if (vstride == 1) {
469 return hstride == 0;
470 } else {
471 return hstride == 1;
472 }
473 }
474
475 return false;
476 }
477
478 /**
479 * Returns whether a region is linear
480 *
481 * A region is linear if its elements do not overlap and are not replicated.
482 * Unlike a packed region, intervening space (i.e. strided values) is allowed.
483 */
484 static bool
is_linear(unsigned vstride,unsigned width,unsigned hstride)485 is_linear(unsigned vstride, unsigned width, unsigned hstride)
486 {
487 return vstride == width * hstride ||
488 (hstride == 0 && width == 1);
489 }
490
491 /**
492 * Returns whether an instruction is an explicit or implicit conversion
493 * to/from half-float.
494 */
495 static bool
is_half_float_conversion(const struct elk_isa_info * isa,const elk_inst * inst)496 is_half_float_conversion(const struct elk_isa_info *isa,
497 const elk_inst *inst)
498 {
499 const struct intel_device_info *devinfo = isa->devinfo;
500
501 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
502
503 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
504 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
505
506 if (dst_type != src0_type &&
507 (dst_type == ELK_REGISTER_TYPE_HF || src0_type == ELK_REGISTER_TYPE_HF)) {
508 return true;
509 } else if (num_sources > 1) {
510 enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
511 return dst_type != src1_type &&
512 (dst_type == ELK_REGISTER_TYPE_HF ||
513 src1_type == ELK_REGISTER_TYPE_HF);
514 }
515
516 return false;
517 }
518
519 /*
520 * Returns whether an instruction is using mixed float operation mode
521 */
522 static bool
is_mixed_float(const struct elk_isa_info * isa,const elk_inst * inst)523 is_mixed_float(const struct elk_isa_info *isa, const elk_inst *inst)
524 {
525 const struct intel_device_info *devinfo = isa->devinfo;
526
527 if (devinfo->ver < 8)
528 return false;
529
530 if (inst_is_send(isa, inst))
531 return false;
532
533 unsigned opcode = elk_inst_opcode(isa, inst);
534 const struct elk_opcode_desc *desc = elk_opcode_desc(isa, opcode);
535 if (desc->ndst == 0)
536 return false;
537
538 /* FIXME: support 3-src instructions */
539 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
540 assert(num_sources < 3);
541
542 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
543 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
544
545 if (num_sources == 1)
546 return types_are_mixed_float(src0_type, dst_type);
547
548 enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
549
550 return types_are_mixed_float(src0_type, src1_type) ||
551 types_are_mixed_float(src0_type, dst_type) ||
552 types_are_mixed_float(src1_type, dst_type);
553 }
554
555 /**
556 * Returns whether an instruction is an explicit or implicit conversion
557 * to/from byte.
558 */
559 static bool
is_byte_conversion(const struct elk_isa_info * isa,const elk_inst * inst)560 is_byte_conversion(const struct elk_isa_info *isa,
561 const elk_inst *inst)
562 {
563 const struct intel_device_info *devinfo = isa->devinfo;
564
565 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
566
567 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
568 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
569
570 if (dst_type != src0_type &&
571 (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
572 return true;
573 } else if (num_sources > 1) {
574 enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
575 return dst_type != src1_type &&
576 (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
577 }
578
579 return false;
580 }
581
582 /**
583 * Checks restrictions listed in "General Restrictions Based on Operand Types"
584 * in the "Register Region Restrictions" section.
585 */
586 static struct string
general_restrictions_based_on_operand_types(const struct elk_isa_info * isa,const elk_inst * inst)587 general_restrictions_based_on_operand_types(const struct elk_isa_info *isa,
588 const elk_inst *inst)
589 {
590 const struct intel_device_info *devinfo = isa->devinfo;
591
592 const struct elk_opcode_desc *desc =
593 elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
594 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
595 unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
596 struct string error_msg = { .str = NULL, .len = 0 };
597
598 if (inst_is_send(isa, inst))
599 return error_msg;
600
601 enum elk_reg_type dst_type;
602
603 if (num_sources == 3) {
604 dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
605 } else {
606 dst_type = elk_inst_dst_type(devinfo, inst);
607 }
608
609 ERROR_IF(dst_type == ELK_REGISTER_TYPE_DF &&
610 !devinfo->has_64bit_float,
611 "64-bit float destination, but platform does not support it");
612
613 ERROR_IF((dst_type == ELK_REGISTER_TYPE_Q ||
614 dst_type == ELK_REGISTER_TYPE_UQ) &&
615 !devinfo->has_64bit_int,
616 "64-bit int destination, but platform does not support it");
617
618 for (unsigned s = 0; s < num_sources; s++) {
619 enum elk_reg_type src_type;
620 if (num_sources == 3) {
621 src_type = elk_inst_3src_a16_src_type(devinfo, inst);
622 } else {
623 switch (s) {
624 case 0: src_type = elk_inst_src0_type(devinfo, inst); break;
625 case 1: src_type = elk_inst_src1_type(devinfo, inst); break;
626 default: unreachable("invalid src");
627 }
628 }
629
630 ERROR_IF(src_type == ELK_REGISTER_TYPE_DF &&
631 !devinfo->has_64bit_float,
632 "64-bit float source, but platform does not support it");
633
634 ERROR_IF((src_type == ELK_REGISTER_TYPE_Q ||
635 src_type == ELK_REGISTER_TYPE_UQ) &&
636 !devinfo->has_64bit_int,
637 "64-bit int source, but platform does not support it");
638 }
639
640 if (num_sources == 3)
641 return error_msg;
642
643 if (exec_size == 1)
644 return error_msg;
645
646 if (desc->ndst == 0)
647 return error_msg;
648
649 /* The PRMs say:
650 *
651 * Where n is the largest element size in bytes for any source or
652 * destination operand type, ExecSize * n must be <= 64.
653 *
654 * But we do not attempt to enforce it, because it is implied by other
655 * rules:
656 *
657 * - that the destination stride must match the execution data type
658 * - sources may not span more than two adjacent GRF registers
659 * - destination may not span more than two adjacent GRF registers
660 *
661 * In fact, checking it would weaken testing of the other rules.
662 */
663
664 unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
665 bool dst_type_is_byte =
666 elk_inst_dst_type(devinfo, inst) == ELK_REGISTER_TYPE_B ||
667 elk_inst_dst_type(devinfo, inst) == ELK_REGISTER_TYPE_UB;
668
669 if (dst_type_is_byte) {
670 if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
671 if (!inst_is_raw_move(isa, inst))
672 ERROR("Only raw MOV supports a packed-byte destination");
673 return error_msg;
674 }
675 }
676
677 unsigned exec_type = execution_type(isa, inst);
678 unsigned exec_type_size = elk_reg_type_to_size(exec_type);
679 unsigned dst_type_size = elk_reg_type_to_size(dst_type);
680
681 /* On IVB/BYT, region parameters and execution size for DF are in terms of
682 * 32-bit elements, so they are doubled. For evaluating the validity of an
683 * instruction, we halve them.
684 */
685 if (devinfo->verx10 == 70 &&
686 exec_type_size == 8 && dst_type_size == 4)
687 dst_type_size = 8;
688
689 if (is_byte_conversion(isa, inst)) {
690 /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
691 *
692 * "There is no direct conversion from B/UB to DF or DF to B/UB.
693 * There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
694 *
695 * Even if these restrictions are listed for the MOV instruction, we
696 * validate this more generally, since there is the possibility
697 * of implicit conversions from other instructions.
698 */
699 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
700 enum elk_reg_type src1_type = num_sources > 1 ?
701 elk_inst_src1_type(devinfo, inst) : 0;
702
703 ERROR_IF(type_sz(dst_type) == 1 &&
704 (type_sz(src0_type) == 8 ||
705 (num_sources > 1 && type_sz(src1_type) == 8)),
706 "There are no direct conversions between 64-bit types and B/UB");
707
708 ERROR_IF(type_sz(dst_type) == 8 &&
709 (type_sz(src0_type) == 1 ||
710 (num_sources > 1 && type_sz(src1_type) == 1)),
711 "There are no direct conversions between 64-bit types and B/UB");
712 }
713
714 if (is_half_float_conversion(isa, inst)) {
715 /**
716 * A helper to validate used in the validation of the following restriction
717 * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
718 *
719 * "There is no direct conversion from HF to DF or DF to HF.
720 * There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
721 *
722 * Even if these restrictions are listed for the MOV instruction, we
723 * validate this more generally, since there is the possibility
724 * of implicit conversions from other instructions, such us implicit
725 * conversion from integer to HF with the ADD instruction in SKL+.
726 */
727 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
728 enum elk_reg_type src1_type = num_sources > 1 ?
729 elk_inst_src1_type(devinfo, inst) : 0;
730 ERROR_IF(dst_type == ELK_REGISTER_TYPE_HF &&
731 (type_sz(src0_type) == 8 ||
732 (num_sources > 1 && type_sz(src1_type) == 8)),
733 "There are no direct conversions between 64-bit types and HF");
734
735 ERROR_IF(type_sz(dst_type) == 8 &&
736 (src0_type == ELK_REGISTER_TYPE_HF ||
737 (num_sources > 1 && src1_type == ELK_REGISTER_TYPE_HF)),
738 "There are no direct conversions between 64-bit types and HF");
739
740 /* From the BDW+ PRM:
741 *
742 * "Conversion between Integer and HF (Half Float) must be
743 * DWord-aligned and strided by a DWord on the destination."
744 *
745 * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
746 *
747 * "There is a relaxed alignment rule for word destinations. When
748 * the destination type is word (UW, W, HF), destination data types
749 * can be aligned to either the lowest word or the second lowest
750 * word of the execution channel. This means the destination data
751 * words can be either all in the even word locations or all in the
752 * odd word locations."
753 *
754 * We do not implement the second rule as is though, since empirical
755 * testing shows inconsistencies:
756 * - It suggests that packed 16-bit is not allowed, which is not true.
757 * - It suggests that conversions from Q/DF to W (which need to be
758 * 64-bit aligned on the destination) are not possible, which is
759 * not true.
760 *
761 * So from this rule we only validate the implication that conversions
762 * from F to HF need to be DWord strided (except in Align1 mixed
763 * float mode where packed fp16 destination is allowed so long as the
764 * destination is oword-aligned).
765 *
766 * Finally, we only validate this for Align1 because Align16 always
767 * requires packed destinations, so these restrictions can't possibly
768 * apply to Align16 mode.
769 */
770 if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
771 if ((dst_type == ELK_REGISTER_TYPE_HF &&
772 (elk_reg_type_is_integer(src0_type) ||
773 (num_sources > 1 && elk_reg_type_is_integer(src1_type)))) ||
774 (elk_reg_type_is_integer(dst_type) &&
775 (src0_type == ELK_REGISTER_TYPE_HF ||
776 (num_sources > 1 && src1_type == ELK_REGISTER_TYPE_HF)))) {
777 ERROR_IF(dst_stride * dst_type_size != 4,
778 "Conversions between integer and half-float must be "
779 "strided by a DWord on the destination");
780
781 unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
782 ERROR_IF(subreg % 4 != 0,
783 "Conversions between integer and half-float must be "
784 "aligned to a DWord on the destination");
785 } else if (devinfo->platform == INTEL_PLATFORM_CHV &&
786 dst_type == ELK_REGISTER_TYPE_HF) {
787 unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
788 ERROR_IF(dst_stride != 2 &&
789 !(is_mixed_float(isa, inst) &&
790 dst_stride == 1 && subreg % 16 == 0),
791 "Conversions to HF must have either all words in even "
792 "word locations or all words in odd word locations or "
793 "be mixed-float with Oword-aligned packed destination");
794 }
795 }
796 }
797
798 /* There are special regioning rules for mixed-float mode in CHV and SKL that
799 * override the general rule for the ratio of sizes of the destination type
800 * and the execution type. We will add validation for those in a later patch.
801 */
802 bool validate_dst_size_and_exec_size_ratio =
803 !is_mixed_float(isa, inst) ||
804 !(devinfo->platform == INTEL_PLATFORM_CHV);
805
806 if (validate_dst_size_and_exec_size_ratio &&
807 exec_type_size > dst_type_size) {
808 if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) {
809 ERROR_IF(dst_stride * dst_type_size != exec_type_size,
810 "Destination stride must be equal to the ratio of the sizes "
811 "of the execution data type to the destination type");
812 }
813
814 unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
815
816 if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 &&
817 elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
818 /* The i965 PRM says:
819 *
820 * Implementation Restriction: The relaxed alignment rule for byte
821 * destination (#10.5) is not supported.
822 */
823 if (devinfo->verx10 >= 45 && dst_type_is_byte) {
824 ERROR_IF(subreg % exec_type_size != 0 &&
825 subreg % exec_type_size != 1,
826 "Destination subreg must be aligned to the size of the "
827 "execution data type (or to the next lowest byte for byte "
828 "destinations)");
829 } else {
830 ERROR_IF(subreg % exec_type_size != 0,
831 "Destination subreg must be aligned to the size of the "
832 "execution data type");
833 }
834 }
835 }
836
837 return error_msg;
838 }
839
840 /**
841 * Checks restrictions listed in "General Restrictions on Regioning Parameters"
842 * in the "Register Region Restrictions" section.
843 */
844 static struct string
general_restrictions_on_region_parameters(const struct elk_isa_info * isa,const elk_inst * inst)845 general_restrictions_on_region_parameters(const struct elk_isa_info *isa,
846 const elk_inst *inst)
847 {
848 const struct intel_device_info *devinfo = isa->devinfo;
849
850 const struct elk_opcode_desc *desc =
851 elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
852 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
853 unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
854 struct string error_msg = { .str = NULL, .len = 0 };
855
856 if (num_sources == 3)
857 return (struct string){};
858
859 if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16) {
860 if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
861 ERROR_IF(elk_inst_dst_hstride(devinfo, inst) != ELK_HORIZONTAL_STRIDE_1,
862 "Destination Horizontal Stride must be 1");
863
864 if (num_sources >= 1) {
865 if (devinfo->verx10 >= 75) {
866 ERROR_IF(elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
867 elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
868 elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_2 &&
869 elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
870 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
871 } else {
872 ERROR_IF(elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
873 elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
874 elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
875 "In Align16 mode, only VertStride of 0 or 4 is allowed");
876 }
877 }
878
879 if (num_sources == 2) {
880 if (devinfo->verx10 >= 75) {
881 ERROR_IF(elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
882 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
883 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_2 &&
884 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
885 "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
886 } else {
887 ERROR_IF(elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
888 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
889 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
890 "In Align16 mode, only VertStride of 0 or 4 is allowed");
891 }
892 }
893
894 return error_msg;
895 }
896
897 for (unsigned i = 0; i < num_sources; i++) {
898 unsigned vstride, width, hstride, element_size, subreg;
899 enum elk_reg_type type;
900
901 #define DO_SRC(n) \
902 if (elk_inst_src ## n ## _reg_file(devinfo, inst) == \
903 ELK_IMMEDIATE_VALUE) \
904 continue; \
905 \
906 vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst)); \
907 width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst)); \
908 hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst)); \
909 type = elk_inst_src ## n ## _type(devinfo, inst); \
910 element_size = elk_reg_type_to_size(type); \
911 subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
912
913 if (i == 0) {
914 DO_SRC(0);
915 } else {
916 DO_SRC(1);
917 }
918 #undef DO_SRC
919
920 /* On IVB/BYT, region parameters and execution size for DF are in terms of
921 * 32-bit elements, so they are doubled. For evaluating the validity of an
922 * instruction, we halve them.
923 */
924 if (devinfo->verx10 == 70 &&
925 element_size == 8)
926 element_size = 4;
927
928 /* ExecSize must be greater than or equal to Width. */
929 ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
930 "to Width");
931
932 /* If ExecSize = Width and HorzStride ≠ 0,
933 * VertStride must be set to Width * HorzStride.
934 */
935 if (exec_size == width && hstride != 0) {
936 ERROR_IF(vstride != width * hstride,
937 "If ExecSize = Width and HorzStride ≠ 0, "
938 "VertStride must be set to Width * HorzStride");
939 }
940
941 /* If Width = 1, HorzStride must be 0 regardless of the values of
942 * ExecSize and VertStride.
943 */
944 if (width == 1) {
945 ERROR_IF(hstride != 0,
946 "If Width = 1, HorzStride must be 0 regardless "
947 "of the values of ExecSize and VertStride");
948 }
949
950 /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
951 if (exec_size == 1 && width == 1) {
952 ERROR_IF(vstride != 0 || hstride != 0,
953 "If ExecSize = Width = 1, both VertStride "
954 "and HorzStride must be 0");
955 }
956
957 /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
958 * value of ExecSize.
959 */
960 if (vstride == 0 && hstride == 0) {
961 ERROR_IF(width != 1,
962 "If VertStride = HorzStride = 0, Width must be "
963 "1 regardless of the value of ExecSize");
964 }
965
966 /* VertStride must be used to cross GRF register boundaries. This rule
967 * implies that elements within a 'Width' cannot cross GRF boundaries.
968 */
969 const uint64_t mask = (1ULL << element_size) - 1;
970 unsigned rowbase = subreg;
971
972 for (int y = 0; y < exec_size / width; y++) {
973 uint64_t access_mask = 0;
974 unsigned offset = rowbase;
975
976 for (int x = 0; x < width; x++) {
977 access_mask |= mask << (offset % 64);
978 offset += hstride * element_size;
979 }
980
981 rowbase += vstride * element_size;
982
983 if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
984 ERROR("VertStride must be used to cross GRF register boundaries");
985 break;
986 }
987 }
988 }
989
990 /* Dst.HorzStride must not be 0. */
991 if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
992 ERROR_IF(elk_inst_dst_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0,
993 "Destination Horizontal Stride must not be 0");
994 }
995
996 return error_msg;
997 }
998
999 static struct string
special_restrictions_for_mixed_float_mode(const struct elk_isa_info * isa,const elk_inst * inst)1000 special_restrictions_for_mixed_float_mode(const struct elk_isa_info *isa,
1001 const elk_inst *inst)
1002 {
1003 const struct intel_device_info *devinfo = isa->devinfo;
1004
1005 struct string error_msg = { .str = NULL, .len = 0 };
1006
1007 const unsigned opcode = elk_inst_opcode(isa, inst);
1008 const unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1009 if (num_sources >= 3)
1010 return error_msg;
1011
1012 if (!is_mixed_float(isa, inst))
1013 return error_msg;
1014
1015 unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
1016 bool is_align16 = elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16;
1017
1018 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1019 enum elk_reg_type src1_type = num_sources > 1 ?
1020 elk_inst_src1_type(devinfo, inst) : 0;
1021 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1022
1023 unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1024 bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1025
1026 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1027 * Float Operations:
1028 *
1029 * "Indirect addressing on source is not supported when source and
1030 * destination data types are mixed float."
1031 */
1032 ERROR_IF(elk_inst_src0_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT ||
1033 (num_sources > 1 &&
1034 elk_inst_src1_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT),
1035 "Indirect addressing on source is not supported when source and "
1036 "destination data types are mixed float");
1037
1038 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1039 * Float Operations:
1040 *
1041 * "No SIMD16 in mixed mode when destination is f32. Instruction
1042 * execution size must be no more than 8."
1043 */
1044 ERROR_IF(exec_size > 8 && dst_type == ELK_REGISTER_TYPE_F,
1045 "Mixed float mode with 32-bit float destination is limited "
1046 "to SIMD8");
1047
1048 if (is_align16) {
1049 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1050 * Float Operations:
1051 *
1052 * "In Align16 mode, when half float and float data types are mixed
1053 * between source operands OR between source and destination operands,
1054 * the register content are assumed to be packed."
1055 *
1056 * Since Align16 doesn't have a concept of horizontal stride (or width),
1057 * it means that vertical stride must always be 4, since 0 and 2 would
1058 * lead to replicated data, and any other value is disallowed in Align16.
1059 */
1060 ERROR_IF(elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
1061 "Align16 mixed float mode assumes packed data (vstride must be 4");
1062
1063 ERROR_IF(num_sources >= 2 &&
1064 elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
1065 "Align16 mixed float mode assumes packed data (vstride must be 4");
1066
1067 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1068 * Float Operations:
1069 *
1070 * "For Align16 mixed mode, both input and output packed f16 data
1071 * must be oword aligned, no oword crossing in packed f16."
1072 *
1073 * The previous rule requires that Align16 operands are always packed,
1074 * and since there is only one bit for Align16 subnr, which represents
1075 * offsets 0B and 16B, this rule is always enforced and we don't need to
1076 * validate it.
1077 */
1078
1079 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1080 * Float Operations:
1081 *
1082 * "No SIMD16 in mixed mode when destination is packed f16 for both
1083 * Align1 and Align16."
1084 *
1085 * And:
1086 *
1087 * "In Align16 mode, when half float and float data types are mixed
1088 * between source operands OR between source and destination operands,
1089 * the register content are assumed to be packed."
1090 *
1091 * Which implies that SIMD16 is not available in Align16. This is further
1092 * confirmed by:
1093 *
1094 * "For Align16 mixed mode, both input and output packed f16 data
1095 * must be oword aligned, no oword crossing in packed f16"
1096 *
1097 * Since oword-aligned packed f16 data would cross oword boundaries when
1098 * the execution size is larger than 8.
1099 */
1100 ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1101
1102 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1103 * Float Operations:
1104 *
1105 * "No accumulator read access for Align16 mixed float."
1106 */
1107 ERROR_IF(inst_uses_src_acc(isa, inst),
1108 "No accumulator read access for Align16 mixed float");
1109 } else {
1110 assert(!is_align16);
1111
1112 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1113 * Float Operations:
1114 *
1115 * "No SIMD16 in mixed mode when destination is packed f16 for both
1116 * Align1 and Align16."
1117 */
1118 ERROR_IF(exec_size > 8 && dst_is_packed &&
1119 dst_type == ELK_REGISTER_TYPE_HF,
1120 "Align1 mixed float mode is limited to SIMD8 when destination "
1121 "is packed half-float");
1122
1123 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1124 * Float Operations:
1125 *
1126 * "Math operations for mixed mode:
1127 * - In Align1, f16 inputs need to be strided"
1128 */
1129 if (opcode == ELK_OPCODE_MATH) {
1130 if (src0_type == ELK_REGISTER_TYPE_HF) {
1131 ERROR_IF(STRIDE(elk_inst_src0_hstride(devinfo, inst)) <= 1,
1132 "Align1 mixed mode math needs strided half-float inputs");
1133 }
1134
1135 if (num_sources >= 2 && src1_type == ELK_REGISTER_TYPE_HF) {
1136 ERROR_IF(STRIDE(elk_inst_src1_hstride(devinfo, inst)) <= 1,
1137 "Align1 mixed mode math needs strided half-float inputs");
1138 }
1139 }
1140
1141 if (dst_type == ELK_REGISTER_TYPE_HF && dst_stride == 1) {
1142 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1143 * Float Operations:
1144 *
1145 * "In Align1, destination stride can be smaller than execution
1146 * type. When destination is stride of 1, 16 bit packed data is
1147 * updated on the destination. However, output packed f16 data
1148 * must be oword aligned, no oword crossing in packed f16."
1149 *
1150 * The requirement of not crossing oword boundaries for 16-bit oword
1151 * aligned data means that execution size is limited to 8.
1152 */
1153 unsigned subreg;
1154 if (elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT)
1155 subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1156 else
1157 subreg = elk_inst_dst_ia_subreg_nr(devinfo, inst);
1158 ERROR_IF(subreg % 16 != 0,
1159 "Align1 mixed mode packed half-float output must be "
1160 "oword aligned");
1161 ERROR_IF(exec_size > 8,
1162 "Align1 mixed mode packed half-float output must not "
1163 "cross oword boundaries (max exec size is 8)");
1164
1165 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1166 * Float Operations:
1167 *
1168 * "When source is float or half float from accumulator register and
1169 * destination is half float with a stride of 1, the source must
1170 * register aligned. i.e., source must have offset zero."
1171 *
1172 * Align16 mixed float mode doesn't allow accumulator access on sources,
1173 * so we only need to check this for Align1.
1174 */
1175 if (src0_is_acc(devinfo, inst) &&
1176 (src0_type == ELK_REGISTER_TYPE_F ||
1177 src0_type == ELK_REGISTER_TYPE_HF)) {
1178 ERROR_IF(elk_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1179 "Mixed float mode requires register-aligned accumulator "
1180 "source reads when destination is packed half-float");
1181
1182 }
1183
1184 if (num_sources > 1 &&
1185 src1_is_acc(devinfo, inst) &&
1186 (src1_type == ELK_REGISTER_TYPE_F ||
1187 src1_type == ELK_REGISTER_TYPE_HF)) {
1188 ERROR_IF(elk_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1189 "Mixed float mode requires register-aligned accumulator "
1190 "source reads when destination is packed half-float");
1191 }
1192 }
1193
1194 /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1195 * Float Operations:
1196 *
1197 * "No swizzle is allowed when an accumulator is used as an implicit
1198 * source or an explicit source in an instruction. i.e. when
1199 * destination is half float with an implicit accumulator source,
1200 * destination stride needs to be 2."
1201 *
1202 * FIXME: it is not quite clear what the first sentence actually means
1203 * or its link to the implication described after it, so we only
1204 * validate the explicit implication, which is clearly described.
1205 */
1206 if (dst_type == ELK_REGISTER_TYPE_HF &&
1207 inst_uses_src_acc(isa, inst)) {
1208 ERROR_IF(dst_stride != 2,
1209 "Mixed float mode with implicit/explicit accumulator "
1210 "source and half-float destination requires a stride "
1211 "of 2 on the destination");
1212 }
1213 }
1214
1215 return error_msg;
1216 }
1217
1218 /**
1219 * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1220 *
1221 * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1222 * a bitmask of bytes accessed by the region.
1223 *
1224 * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1225 * instruction would be
1226 *
1227 * access_mask[0] = 0x00000000000000F0
1228 * access_mask[1] = 0x000000000000F000
1229 * access_mask[2] = 0x0000000000F00000
1230 * access_mask[3] = 0x00000000F0000000
1231 * access_mask[4-31] = 0
1232 *
1233 * because the first execution channel accesses bytes 7-4 and the second
1234 * execution channel accesses bytes 15-12, etc.
1235 */
1236 static void
align1_access_mask(uint64_t access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1237 align1_access_mask(uint64_t access_mask[static 32],
1238 unsigned exec_size, unsigned element_size, unsigned subreg,
1239 unsigned vstride, unsigned width, unsigned hstride)
1240 {
1241 const uint64_t mask = (1ULL << element_size) - 1;
1242 unsigned rowbase = subreg;
1243 unsigned element = 0;
1244
1245 for (int y = 0; y < exec_size / width; y++) {
1246 unsigned offset = rowbase;
1247
1248 for (int x = 0; x < width; x++) {
1249 access_mask[element++] = mask << (offset % 64);
1250 offset += hstride * element_size;
1251 }
1252
1253 rowbase += vstride * element_size;
1254 }
1255
1256 assert(element == 0 || element == exec_size);
1257 }
1258
1259 /**
1260 * Returns the number of registers accessed according to the \p access_mask
1261 */
1262 static int
registers_read(const uint64_t access_mask[static32])1263 registers_read(const uint64_t access_mask[static 32])
1264 {
1265 int regs_read = 0;
1266
1267 for (unsigned i = 0; i < 32; i++) {
1268 if (access_mask[i] > 0xFFFFFFFF) {
1269 return 2;
1270 } else if (access_mask[i]) {
1271 regs_read = 1;
1272 }
1273 }
1274
1275 return regs_read;
1276 }
1277
1278 /**
1279 * Checks restrictions listed in "Region Alignment Rules" in the "Register
1280 * Region Restrictions" section.
1281 */
1282 static struct string
region_alignment_rules(const struct elk_isa_info * isa,const elk_inst * inst)1283 region_alignment_rules(const struct elk_isa_info *isa,
1284 const elk_inst *inst)
1285 {
1286 const struct intel_device_info *devinfo = isa->devinfo;
1287 const struct elk_opcode_desc *desc =
1288 elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
1289 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1290 unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
1291 uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1292 struct string error_msg = { .str = NULL, .len = 0 };
1293
1294 if (num_sources == 3)
1295 return (struct string){};
1296
1297 if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16)
1298 return (struct string){};
1299
1300 if (inst_is_send(isa, inst))
1301 return (struct string){};
1302
1303 memset(dst_access_mask, 0, sizeof(dst_access_mask));
1304 memset(src0_access_mask, 0, sizeof(src0_access_mask));
1305 memset(src1_access_mask, 0, sizeof(src1_access_mask));
1306
1307 for (unsigned i = 0; i < num_sources; i++) {
1308 unsigned vstride, width, hstride, element_size, subreg;
1309 enum elk_reg_type type;
1310
1311 /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1312 * GRF registers.
1313 */
1314
1315 #define DO_SRC(n) \
1316 if (elk_inst_src ## n ## _address_mode(devinfo, inst) != \
1317 ELK_ADDRESS_DIRECT) \
1318 continue; \
1319 \
1320 if (elk_inst_src ## n ## _reg_file(devinfo, inst) == \
1321 ELK_IMMEDIATE_VALUE) \
1322 continue; \
1323 \
1324 vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst)); \
1325 width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst)); \
1326 hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst)); \
1327 type = elk_inst_src ## n ## _type(devinfo, inst); \
1328 element_size = elk_reg_type_to_size(type); \
1329 subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1330 align1_access_mask(src ## n ## _access_mask, \
1331 exec_size, element_size, subreg, \
1332 vstride, width, hstride)
1333
1334 if (i == 0) {
1335 DO_SRC(0);
1336 } else {
1337 DO_SRC(1);
1338 }
1339 #undef DO_SRC
1340
1341 unsigned num_vstride = exec_size / width;
1342 unsigned num_hstride = width;
1343 unsigned vstride_elements = (num_vstride - 1) * vstride;
1344 unsigned hstride_elements = (num_hstride - 1) * hstride;
1345 unsigned offset = (vstride_elements + hstride_elements) * element_size +
1346 subreg;
1347 ERROR_IF(offset >= 64 * reg_unit(devinfo),
1348 "A source cannot span more than 2 adjacent GRF registers");
1349 }
1350
1351 if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1352 return error_msg;
1353
1354 unsigned stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1355 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1356 unsigned element_size = elk_reg_type_to_size(dst_type);
1357 unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1358 unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1359 ERROR_IF(offset >= 64 * reg_unit(devinfo),
1360 "A destination cannot span more than 2 adjacent GRF registers");
1361
1362 if (error_msg.str)
1363 return error_msg;
1364
1365 /* On IVB/BYT, region parameters and execution size for DF are in terms of
1366 * 32-bit elements, so they are doubled. For evaluating the validity of an
1367 * instruction, we halve them.
1368 */
1369 if (devinfo->verx10 == 70 &&
1370 element_size == 8)
1371 element_size = 4;
1372
1373 align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1374 exec_size == 1 ? 0 : exec_size * stride,
1375 exec_size == 1 ? 1 : exec_size,
1376 exec_size == 1 ? 0 : stride);
1377
1378 unsigned dst_regs = registers_read(dst_access_mask);
1379 unsigned src0_regs = registers_read(src0_access_mask);
1380 unsigned src1_regs = registers_read(src1_access_mask);
1381
1382 /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1383 *
1384 * When an instruction has a source region spanning two registers and a
1385 * destination region contained in one register, the number of elements
1386 * must be the same between two sources and one of the following must be
1387 * true:
1388 *
1389 * 1. The destination region is entirely contained in the lower OWord
1390 * of a register.
1391 * 2. The destination region is entirely contained in the upper OWord
1392 * of a register.
1393 * 3. The destination elements are evenly split between the two OWords
1394 * of a register.
1395 */
1396 if (devinfo->ver <= 8) {
1397 if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1398 unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1399
1400 for (unsigned i = 0; i < exec_size; i++) {
1401 if (dst_access_mask[i] > 0x0000FFFF) {
1402 upper_oword_writes++;
1403 } else {
1404 assert(dst_access_mask[i] != 0);
1405 lower_oword_writes++;
1406 }
1407 }
1408
1409 ERROR_IF(lower_oword_writes != 0 &&
1410 upper_oword_writes != 0 &&
1411 upper_oword_writes != lower_oword_writes,
1412 "Writes must be to only one OWord or "
1413 "evenly split between OWords");
1414 }
1415 }
1416
1417 /* The IVB and HSW PRMs say:
1418 *
1419 * When an instruction has a source region that spans two registers and
1420 * the destination spans two registers, the destination elements must be
1421 * evenly split between the two registers [...]
1422 *
1423 * The SNB PRM contains similar wording (but written in a much more
1424 * confusing manner).
1425 *
1426 * The BDW PRM says:
1427 *
1428 * When destination spans two registers, the source may be one or two
1429 * registers. The destination elements must be evenly split between the
1430 * two registers.
1431 *
1432 * The SKL PRM says:
1433 *
1434 * When destination of MATH instruction spans two registers, the
1435 * destination elements must be evenly split between the two registers.
1436 *
1437 * It is not known whether this restriction applies to KBL other Gens after
1438 * SKL.
1439 */
1440 if (devinfo->ver <= 8 ||
1441 elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
1442
1443 /* Nothing explicitly states that on Gen < 8 elements must be evenly
1444 * split between two destination registers in the two exceptional
1445 * source-region-spans-one-register cases, but since Broadwell requires
1446 * evenly split writes regardless of source region, we assume that it was
1447 * an oversight and require it.
1448 */
1449 if (dst_regs == 2) {
1450 unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1451
1452 for (unsigned i = 0; i < exec_size; i++) {
1453 if (dst_access_mask[i] > 0xFFFFFFFF) {
1454 upper_reg_writes++;
1455 } else {
1456 assert(dst_access_mask[i] != 0);
1457 lower_reg_writes++;
1458 }
1459 }
1460
1461 ERROR_IF(upper_reg_writes != lower_reg_writes,
1462 "Writes must be evenly split between the two "
1463 "destination registers");
1464 }
1465 }
1466
1467 /* The IVB and HSW PRMs say:
1468 *
1469 * When an instruction has a source region that spans two registers and
1470 * the destination spans two registers, the destination elements must be
1471 * evenly split between the two registers and each destination register
1472 * must be entirely derived from one source register.
1473 *
1474 * Note: In such cases, the regioning parameters must ensure that the
1475 * offset from the two source registers is the same.
1476 *
1477 * The SNB PRM contains similar wording (but written in a much more
1478 * confusing manner).
1479 *
1480 * There are effectively three rules stated here:
1481 *
1482 * For an instruction with a source and a destination spanning two
1483 * registers,
1484 *
1485 * (1) destination elements must be evenly split between the two
1486 * registers
1487 * (2) all destination elements in a register must be derived
1488 * from one source register
1489 * (3) the offset (i.e. the starting location in each of the two
1490 * registers spanned by a region) must be the same in the two
1491 * registers spanned by a region
1492 *
1493 * It is impossible to violate rule (1) without violating (2) or (3), so we
1494 * do not attempt to validate it.
1495 */
1496 if (devinfo->ver <= 7 && dst_regs == 2) {
1497 for (unsigned i = 0; i < num_sources; i++) {
1498 #define DO_SRC(n) \
1499 if (src ## n ## _regs <= 1) \
1500 continue; \
1501 \
1502 for (unsigned i = 0; i < exec_size; i++) { \
1503 if ((dst_access_mask[i] > 0xFFFFFFFF) != \
1504 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) { \
1505 ERROR("Each destination register must be entirely derived " \
1506 "from one source register"); \
1507 break; \
1508 } \
1509 } \
1510 \
1511 unsigned offset_0 = \
1512 elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1513 unsigned offset_1 = offset_0; \
1514 \
1515 for (unsigned i = 0; i < exec_size; i++) { \
1516 if (src ## n ## _access_mask[i] > 0xFFFFFFFF) { \
1517 offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32; \
1518 break; \
1519 } \
1520 } \
1521 \
1522 ERROR_IF(num_sources == 2 && offset_0 != offset_1, \
1523 "The offset from the two source registers " \
1524 "must be the same")
1525
1526 if (i == 0) {
1527 DO_SRC(0);
1528 } else {
1529 DO_SRC(1);
1530 }
1531 #undef DO_SRC
1532 }
1533 }
1534
1535 /* The IVB and HSW PRMs say:
1536 *
1537 * When destination spans two registers, the source MUST span two
1538 * registers. The exception to the above rule:
1539 * 1. When source is scalar, the source registers are not
1540 * incremented.
1541 * 2. When source is packed integer Word and destination is packed
1542 * integer DWord, the source register is not incremented by the
1543 * source sub register is incremented.
1544 *
1545 * The SNB PRM does not contain this rule, but the internal documentation
1546 * indicates that it applies to SNB as well. We assume that the rule applies
1547 * to Gen <= 5 although their PRMs do not state it.
1548 *
1549 * While the documentation explicitly says in exception (2) that the
1550 * destination must be an integer DWord, the hardware allows at least a
1551 * float destination type as well. We emit such instructions from
1552 *
1553 * elk_fs_visitor::emit_interpolation_setup_gfx6
1554 * elk_fs_visitor::emit_fragcoord_interpolation
1555 *
1556 * and have for years with no ill effects.
1557 *
1558 * Additionally the simulator source code indicates that the real condition
1559 * is that the size of the destination type is 4 bytes.
1560 *
1561 * HSW PRMs also add a note to the second exception:
1562 * "When lower 8 channels are disabled, the sub register of source1
1563 * operand is not incremented. If the lower 8 channels are expected
1564 * to be disabled, say by predication, the instruction must be split
1565 * into pair of simd8 operations."
1566 *
1567 * We can't reliably know if the channels won't be disabled due to,
1568 * for example, IMASK. So, play it safe and disallow packed-word exception
1569 * for src1.
1570 */
1571 if (devinfo->ver <= 7 && dst_regs == 2) {
1572 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1573 bool dst_is_packed_dword =
1574 is_packed(exec_size * stride, exec_size, stride) &&
1575 elk_reg_type_to_size(dst_type) == 4;
1576
1577 for (unsigned i = 0; i < num_sources; i++) {
1578 #define DO_SRC(n) \
1579 unsigned vstride, width, hstride; \
1580 vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst)); \
1581 width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst)); \
1582 hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst)); \
1583 bool src ## n ## _is_packed_word = \
1584 n != 1 && is_packed(vstride, width, hstride) && \
1585 (elk_inst_src ## n ## _type(devinfo, inst) == ELK_REGISTER_TYPE_W || \
1586 elk_inst_src ## n ## _type(devinfo, inst) == ELK_REGISTER_TYPE_UW); \
1587 \
1588 ERROR_IF(src ## n ## _regs == 1 && \
1589 !src ## n ## _has_scalar_region(devinfo, inst) && \
1590 !(dst_is_packed_dword && src ## n ## _is_packed_word), \
1591 "When the destination spans two registers, the source must " \
1592 "span two registers\n" ERROR_INDENT "(exceptions for scalar " \
1593 "sources, and packed-word to packed-dword expansion for src0)")
1594
1595 if (i == 0) {
1596 DO_SRC(0);
1597 } else {
1598 DO_SRC(1);
1599 }
1600 #undef DO_SRC
1601 }
1602 }
1603
1604 return error_msg;
1605 }
1606
1607 static struct string
vector_immediate_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)1608 vector_immediate_restrictions(const struct elk_isa_info *isa,
1609 const elk_inst *inst)
1610 {
1611 const struct intel_device_info *devinfo = isa->devinfo;
1612
1613 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1614 struct string error_msg = { .str = NULL, .len = 0 };
1615
1616 if (num_sources == 3 || num_sources == 0)
1617 return (struct string){};
1618
1619 unsigned file = num_sources == 1 ?
1620 elk_inst_src0_reg_file(devinfo, inst) :
1621 elk_inst_src1_reg_file(devinfo, inst);
1622 if (file != ELK_IMMEDIATE_VALUE)
1623 return (struct string){};
1624
1625 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1626 unsigned dst_type_size = elk_reg_type_to_size(dst_type);
1627 unsigned dst_subreg = elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 ?
1628 elk_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1629 unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1630 enum elk_reg_type type = num_sources == 1 ?
1631 elk_inst_src0_type(devinfo, inst) :
1632 elk_inst_src1_type(devinfo, inst);
1633
1634 /* The PRMs say:
1635 *
1636 * When an immediate vector is used in an instruction, the destination
1637 * must be 128-bit aligned with destination horizontal stride equivalent
1638 * to a word for an immediate integer vector (v) and equivalent to a
1639 * DWord for an immediate float vector (vf).
1640 *
1641 * The text has not been updated for the addition of the immediate unsigned
1642 * integer vector type (uv) on SNB, but presumably the same restriction
1643 * applies.
1644 */
1645 switch (type) {
1646 case ELK_REGISTER_TYPE_V:
1647 case ELK_REGISTER_TYPE_UV:
1648 case ELK_REGISTER_TYPE_VF:
1649 ERROR_IF(dst_subreg % (128 / 8) != 0,
1650 "Destination must be 128-bit aligned in order to use immediate "
1651 "vector types");
1652
1653 if (type == ELK_REGISTER_TYPE_VF) {
1654 ERROR_IF(dst_type_size * dst_stride != 4,
1655 "Destination must have stride equivalent to dword in order "
1656 "to use the VF type");
1657 } else {
1658 ERROR_IF(dst_type_size * dst_stride != 2,
1659 "Destination must have stride equivalent to word in order "
1660 "to use the V or UV type");
1661 }
1662 break;
1663 default:
1664 break;
1665 }
1666
1667 return error_msg;
1668 }
1669
1670 static struct string
special_requirements_for_handling_double_precision_data_types(const struct elk_isa_info * isa,const elk_inst * inst)1671 special_requirements_for_handling_double_precision_data_types(
1672 const struct elk_isa_info *isa,
1673 const elk_inst *inst)
1674 {
1675 const struct intel_device_info *devinfo = isa->devinfo;
1676
1677 unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1678 struct string error_msg = { .str = NULL, .len = 0 };
1679
1680 if (num_sources == 3 || num_sources == 0)
1681 return (struct string){};
1682
1683 enum elk_reg_type exec_type = execution_type(isa, inst);
1684 unsigned exec_type_size = elk_reg_type_to_size(exec_type);
1685
1686 enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, inst);
1687 enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1688 unsigned dst_type_size = elk_reg_type_to_size(dst_type);
1689 unsigned dst_hstride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1690 unsigned dst_reg = elk_inst_dst_da_reg_nr(devinfo, inst);
1691 unsigned dst_subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1692 unsigned dst_address_mode = elk_inst_dst_address_mode(devinfo, inst);
1693
1694 bool is_integer_dword_multiply =
1695 devinfo->ver >= 8 &&
1696 elk_inst_opcode(isa, inst) == ELK_OPCODE_MUL &&
1697 (elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_D ||
1698 elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_UD) &&
1699 (elk_inst_src1_type(devinfo, inst) == ELK_REGISTER_TYPE_D ||
1700 elk_inst_src1_type(devinfo, inst) == ELK_REGISTER_TYPE_UD);
1701
1702 const bool is_double_precision =
1703 dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1704
1705 for (unsigned i = 0; i < num_sources; i++) {
1706 unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1707 bool is_scalar_region;
1708 enum elk_reg_file file;
1709 enum elk_reg_type type;
1710
1711 #define DO_SRC(n) \
1712 if (elk_inst_src ## n ## _reg_file(devinfo, inst) == \
1713 ELK_IMMEDIATE_VALUE) \
1714 continue; \
1715 \
1716 is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst); \
1717 vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst)); \
1718 width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst)); \
1719 hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst)); \
1720 file = elk_inst_src ## n ## _reg_file(devinfo, inst); \
1721 type = elk_inst_src ## n ## _type(devinfo, inst); \
1722 type_size = elk_reg_type_to_size(type); \
1723 reg = elk_inst_src ## n ## _da_reg_nr(devinfo, inst); \
1724 subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst); \
1725 address_mode = elk_inst_src ## n ## _address_mode(devinfo, inst)
1726
1727 if (i == 0) {
1728 DO_SRC(0);
1729 } else {
1730 DO_SRC(1);
1731 }
1732 #undef DO_SRC
1733
1734 const unsigned src_stride = (hstride ? hstride : vstride) * type_size;
1735 const unsigned dst_stride = dst_hstride * dst_type_size;
1736
1737 /* The PRMs say that for CHV, BXT:
1738 *
1739 * When source or destination datatype is 64b or operation is integer
1740 * DWord multiply, regioning in Align1 must follow these rules:
1741 *
1742 * 1. Source and Destination horizontal stride must be aligned to the
1743 * same qword.
1744 * 2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1745 * 3. Source and Destination offset must be the same, except the case
1746 * of scalar source.
1747 */
1748 if (is_double_precision &&
1749 elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 &&
1750 devinfo->platform == INTEL_PLATFORM_CHV) {
1751 ERROR_IF(!is_scalar_region &&
1752 (src_stride % 8 != 0 ||
1753 dst_stride % 8 != 0 ||
1754 src_stride != dst_stride),
1755 "Source and destination horizontal stride must equal and a "
1756 "multiple of a qword when the execution type is 64-bit");
1757
1758 ERROR_IF(vstride != width * hstride,
1759 "Vstride must be Width * Hstride when the execution type is "
1760 "64-bit");
1761
1762 ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1763 "Source and destination offset must be the same when the "
1764 "execution type is 64-bit");
1765 }
1766
1767 /* The PRMs say that for CHV, BXT:
1768 *
1769 * When source or destination datatype is 64b or operation is integer
1770 * DWord multiply, indirect addressing must not be used.
1771 */
1772 if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1773 ERROR_IF(ELK_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1774 ELK_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1775 "Indirect addressing is not allowed when the execution type "
1776 "is 64-bit");
1777 }
1778
1779 /* The PRMs say that for CHV, BXT:
1780 *
1781 * ARF registers must never be used with 64b datatype or when
1782 * operation is integer DWord multiply.
1783 *
1784 * We assume that the restriction does not apply to the null register.
1785 */
1786 if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1787 ERROR_IF(elk_inst_opcode(isa, inst) == ELK_OPCODE_MAC ||
1788 elk_inst_acc_wr_control(devinfo, inst) ||
1789 (ELK_ARCHITECTURE_REGISTER_FILE == file &&
1790 reg != ELK_ARF_NULL) ||
1791 (ELK_ARCHITECTURE_REGISTER_FILE == dst_file &&
1792 dst_reg != ELK_ARF_NULL),
1793 "Architecture registers cannot be used when the execution "
1794 "type is 64-bit");
1795 }
1796 }
1797
1798 /* The PRMs say that for BDW, SKL:
1799 *
1800 * If Align16 is required for an operation with QW destination and non-QW
1801 * source datatypes, the execution size cannot exceed 2.
1802 *
1803 * We assume that the restriction applies to all Gfx8+ parts.
1804 */
1805 if (is_double_precision && devinfo->ver >= 8) {
1806 enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1807 enum elk_reg_type src1_type =
1808 num_sources > 1 ? elk_inst_src1_type(devinfo, inst) : src0_type;
1809 unsigned src0_type_size = elk_reg_type_to_size(src0_type);
1810 unsigned src1_type_size = elk_reg_type_to_size(src1_type);
1811
1812 ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16 &&
1813 dst_type_size == 8 &&
1814 (src0_type_size != 8 || src1_type_size != 8) &&
1815 elk_inst_exec_size(devinfo, inst) > ELK_EXECUTE_2,
1816 "In Align16 exec size cannot exceed 2 with a QWord destination "
1817 "and a non-QWord source");
1818 }
1819
1820 /* The PRMs say that for CHV, BXT:
1821 *
1822 * When source or destination datatype is 64b or operation is integer
1823 * DWord multiply, DepCtrl must not be used.
1824 */
1825 if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1826 ERROR_IF(elk_inst_no_dd_check(devinfo, inst) ||
1827 elk_inst_no_dd_clear(devinfo, inst),
1828 "DepCtrl is not allowed when the execution type is 64-bit");
1829 }
1830
1831 return error_msg;
1832 }
1833
1834 static struct string
instruction_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)1835 instruction_restrictions(const struct elk_isa_info *isa,
1836 const elk_inst *inst)
1837 {
1838 const struct intel_device_info *devinfo = isa->devinfo;
1839 struct string error_msg = { .str = NULL, .len = 0 };
1840
1841 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_CMP ||
1842 elk_inst_opcode(isa, inst) == ELK_OPCODE_CMPN) {
1843 if (devinfo->ver <= 7) {
1844 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
1845 * ISA) says:
1846 *
1847 * Accumulator cannot be destination, implicit or explicit. The
1848 * destination must be a general register or the null register.
1849 *
1850 * Page 77 of the Haswell PRM Volume 2b contains the same text. The
1851 * 965G PRMs contain similar text.
1852 *
1853 * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
1854 *
1855 * For the cmp and cmpn instructions, remove the accumulator
1856 * restrictions.
1857 */
1858 ERROR_IF(elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
1859 elk_inst_dst_da_reg_nr(devinfo, inst) != ELK_ARF_NULL,
1860 "Accumulator cannot be destination, implicit or explicit.");
1861 }
1862
1863 /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
1864 * says:
1865 *
1866 * If the destination is the null register, the {Switch} instruction
1867 * option must be used.
1868 *
1869 * Page 77 of the Haswell PRM Volume 2b contains the same text.
1870 */
1871 if (devinfo->ver == 7) {
1872 ERROR_IF(dst_is_null(devinfo, inst) &&
1873 elk_inst_thread_control(devinfo, inst) != ELK_THREAD_SWITCH,
1874 "If the destination is the null register, the {Switch} "
1875 "instruction option must be used.");
1876 }
1877
1878 ERROR_IF(elk_inst_cond_modifier(devinfo, inst) == ELK_CONDITIONAL_NONE,
1879 "CMP (or CMPN) must have a condition.");
1880 }
1881
1882 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_SEL) {
1883 if (devinfo->ver < 6) {
1884 ERROR_IF(elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE,
1885 "SEL must not have a condition modifier");
1886 ERROR_IF(elk_inst_pred_control(devinfo, inst) == ELK_PREDICATE_NONE,
1887 "SEL must be predicated");
1888 } else {
1889 ERROR_IF((elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE) ==
1890 (elk_inst_pred_control(devinfo, inst) != ELK_PREDICATE_NONE),
1891 "SEL must either be predicated or have a condition modifiers");
1892 }
1893 }
1894
1895 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MUL) {
1896 const enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1897 const enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
1898 const enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1899
1900 if (devinfo->ver == 6) {
1901 /* Page 223 of the Sandybridge PRM volume 4 part 2 says:
1902 *
1903 * [DevSNB]: When multiple (sic) a DW and a W, the W has to be on
1904 * src0, and the DW has to be on src1.
1905 *
1906 * This text appears only in the Sandybridge PRMw.
1907 */
1908 ERROR_IF(elk_reg_type_is_integer(src0_type) &&
1909 type_sz(src0_type) == 4 && type_sz(src1_type) < 4,
1910 "When multiplying a DW and any lower precision integer, the "
1911 "DW operand must be src1.");
1912 } else if (devinfo->ver >= 7) {
1913 /* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
1914 *
1915 * When multiplying a DW and any lower precision integer, the DW
1916 * operand must on src0.
1917 *
1918 * Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
1919 * text.
1920 */
1921 ERROR_IF(elk_reg_type_is_integer(src1_type) &&
1922 type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
1923 "When multiplying a DW and any lower precision integer, the "
1924 "DW operand must be src0.");
1925 }
1926
1927 if (devinfo->ver <= 7) {
1928 /* Section 14.2.28 of Intel 965 Express Chipset PRM volume 4 says:
1929 *
1930 * Source operands cannot be an accumulator register.
1931 *
1932 * Iron Lake, Sandybridge, and Ivy Bridge PRMs have the same text.
1933 * Haswell does not. Given that later PRMs have different
1934 * restrictions on accumulator sources (see below), it seems most
1935 * likely that Haswell shares the Ivy Bridge restriction.
1936 */
1937 ERROR_IF(src0_is_acc(devinfo, inst) || src1_is_acc(devinfo, inst),
1938 "Source operands cannot be an accumulator register.");
1939 } else {
1940 /* Page 971 (page 987 of the PDF), section "Accumulator
1941 * Restrictions," of the Broadwell PRM volume 7 says:
1942 *
1943 * Integer source operands cannot be accumulators.
1944 *
1945 * The Skylake and Ice Lake PRMs contain the same text.
1946 */
1947 ERROR_IF((src0_is_acc(devinfo, inst) &&
1948 elk_reg_type_is_integer(src0_type)) ||
1949 (src1_is_acc(devinfo, inst) &&
1950 elk_reg_type_is_integer(src1_type)),
1951 "Integer source operands cannot be accumulators.");
1952 }
1953
1954 if (devinfo->ver <= 6) {
1955 /* Page 223 of the Sandybridge PRM volume 4 part 2 says:
1956 *
1957 * Dword integer source is not allowed for this instruction in
1958 * float execution mode. In other words, if one source is of type
1959 * float (:f, :vf), the other source cannot be of type dword
1960 * integer (:ud or :d).
1961 *
1962 * G965 and Iron Lake PRMs have similar text. Later GPUs do not
1963 * allow mixed source types at all, but that restriction should be
1964 * handled elsewhere.
1965 */
1966 ERROR_IF(execution_type(isa, inst) == ELK_REGISTER_TYPE_F &&
1967 (src0_type == ELK_REGISTER_TYPE_UD ||
1968 src0_type == ELK_REGISTER_TYPE_D ||
1969 src1_type == ELK_REGISTER_TYPE_UD ||
1970 src1_type == ELK_REGISTER_TYPE_D),
1971 "Dword integer source is not allowed for this instruction in"
1972 "float execution mode.");
1973 }
1974
1975 if (devinfo->ver <= 7) {
1976 /* Page 118 of the Haswell PRM volume 2b says:
1977 *
1978 * When operating on integers with at least one of the source
1979 * being a DWord type (signed or unsigned), the destination cannot
1980 * be floating-point (implementation note: the data converter only
1981 * looks at the low 34 bits of the result).
1982 *
1983 * G965, Iron Lake, Sandybridge, and Ivy Bridge have similar text.
1984 * Later GPUs do not allow mixed source and destination types at all,
1985 * but that restriction should be handled elsewhere.
1986 */
1987 ERROR_IF(dst_type == ELK_REGISTER_TYPE_F &&
1988 (src0_type == ELK_REGISTER_TYPE_UD ||
1989 src0_type == ELK_REGISTER_TYPE_D ||
1990 src1_type == ELK_REGISTER_TYPE_UD ||
1991 src1_type == ELK_REGISTER_TYPE_D),
1992 "Float destination type not allowed with DWord source type.");
1993 }
1994
1995 if (devinfo->ver == 8) {
1996 /* Page 966 (page 982 of the PDF) of the Broadwell PRM volume 2a
1997 * says:
1998 *
1999 * When multiplying DW x DW, the dst cannot be accumulator.
2000 *
2001 * This text also appears in the Cherry Trail / Braswell PRM, but it
2002 * does not appear in any other PRM.
2003 */
2004 ERROR_IF((src0_type == ELK_REGISTER_TYPE_UD ||
2005 src0_type == ELK_REGISTER_TYPE_D) &&
2006 (src1_type == ELK_REGISTER_TYPE_UD ||
2007 src1_type == ELK_REGISTER_TYPE_D) &&
2008 elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2009 elk_inst_dst_da_reg_nr(devinfo, inst) != ELK_ARF_NULL,
2010 "When multiplying DW x DW, the dst cannot be accumulator.");
2011 }
2012
2013 /* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
2014 *
2015 * When multiplying integer data types, if one of the sources is a
2016 * DW, the resulting full precision data is stored in the
2017 * accumulator. However, if the destination data type is either W or
2018 * DW, the low bits of the result are written to the destination
2019 * register and the remaining high bits are discarded. This results
2020 * in undefined Overflow and Sign flags. Therefore, conditional
2021 * modifiers and saturation (.sat) cannot be used in this case.
2022 *
2023 * Similar text appears in every version of the PRM.
2024 *
2025 * The wording of the last sentence is not very clear. It could either
2026 * be interpreted as "conditional modifiers combined with saturation
2027 * cannot be used" or "neither conditional modifiers nor saturation can
2028 * be used." I have interpreted it as the latter primarily because that
2029 * is the more restrictive interpretation.
2030 */
2031 ERROR_IF((src0_type == ELK_REGISTER_TYPE_UD ||
2032 src0_type == ELK_REGISTER_TYPE_D ||
2033 src1_type == ELK_REGISTER_TYPE_UD ||
2034 src1_type == ELK_REGISTER_TYPE_D) &&
2035 (dst_type == ELK_REGISTER_TYPE_UD ||
2036 dst_type == ELK_REGISTER_TYPE_D ||
2037 dst_type == ELK_REGISTER_TYPE_UW ||
2038 dst_type == ELK_REGISTER_TYPE_W) &&
2039 (elk_inst_saturate(devinfo, inst) != 0 ||
2040 elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE),
2041 "Neither Saturate nor conditional modifier allowed with DW "
2042 "integer multiply.");
2043 }
2044
2045 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
2046 unsigned math_function = elk_inst_math_function(devinfo, inst);
2047 switch (math_function) {
2048 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
2049 case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
2050 case ELK_MATH_FUNCTION_INT_DIV_REMAINDER: {
2051 /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
2052 * INT DIV function does not support source modifiers.
2053 * Bspec 6647 extends it back to Ivy Bridge.
2054 */
2055 bool src0_valid = !elk_inst_src0_negate(devinfo, inst) &&
2056 !elk_inst_src0_abs(devinfo, inst);
2057 bool src1_valid = !elk_inst_src1_negate(devinfo, inst) &&
2058 !elk_inst_src1_abs(devinfo, inst);
2059 ERROR_IF(!src0_valid || !src1_valid,
2060 "INT DIV function does not support source modifiers.");
2061 break;
2062 }
2063 default:
2064 break;
2065 }
2066 }
2067
2068 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_OR ||
2069 elk_inst_opcode(isa, inst) == ELK_OPCODE_AND ||
2070 elk_inst_opcode(isa, inst) == ELK_OPCODE_XOR ||
2071 elk_inst_opcode(isa, inst) == ELK_OPCODE_NOT) {
2072 if (devinfo->ver >= 8) {
2073 /* While the behavior of the negate source modifier is defined as
2074 * logical not, the behavior of abs source modifier is not
2075 * defined. Disallow it to be safe.
2076 */
2077 ERROR_IF(elk_inst_src0_abs(devinfo, inst),
2078 "Behavior of abs source modifier in logic ops is undefined.");
2079 ERROR_IF(elk_inst_opcode(isa, inst) != ELK_OPCODE_NOT &&
2080 elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
2081 elk_inst_src1_abs(devinfo, inst),
2082 "Behavior of abs source modifier in logic ops is undefined.");
2083
2084 /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2085 *
2086 * Source modifier is not allowed if source is an accumulator.
2087 *
2088 * The same text also appears for OR, NOT, and XOR instructions.
2089 */
2090 ERROR_IF((elk_inst_src0_abs(devinfo, inst) ||
2091 elk_inst_src0_negate(devinfo, inst)) &&
2092 src0_is_acc(devinfo, inst),
2093 "Source modifier is not allowed if source is an accumulator.");
2094 ERROR_IF(elk_num_sources_from_inst(isa, inst) > 1 &&
2095 (elk_inst_src1_abs(devinfo, inst) ||
2096 elk_inst_src1_negate(devinfo, inst)) &&
2097 src1_is_acc(devinfo, inst),
2098 "Source modifier is not allowed if source is an accumulator.");
2099 }
2100
2101 /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2102 *
2103 * This operation does not produce sign or overflow conditions. Only
2104 * the .e/.z or .ne/.nz conditional modifiers should be used.
2105 *
2106 * The same text also appears for OR, NOT, and XOR instructions.
2107 *
2108 * Per the comment around nir_op_imod in elk_fs_nir.cpp, we have
2109 * determined this to not be true. The only conditions that seem
2110 * absolutely sketchy are O, R, and U. Some OpenGL shaders from Doom
2111 * 2016 have been observed to generate and.g and operate correctly.
2112 */
2113 const enum elk_conditional_mod cmod =
2114 elk_inst_cond_modifier(devinfo, inst);
2115 ERROR_IF(cmod == ELK_CONDITIONAL_O ||
2116 cmod == ELK_CONDITIONAL_R ||
2117 cmod == ELK_CONDITIONAL_U,
2118 "O, R, and U conditional modifiers should not be used.");
2119 }
2120
2121 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_BFI2) {
2122 ERROR_IF(elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE,
2123 "BFI2 cannot have conditional modifier");
2124
2125 ERROR_IF(elk_inst_saturate(devinfo, inst),
2126 "BFI2 cannot have saturate modifier");
2127
2128 ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1,
2129 "BFI2 cannot have Align1");
2130
2131 enum elk_reg_type dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
2132
2133 ERROR_IF(dst_type != ELK_REGISTER_TYPE_D &&
2134 dst_type != ELK_REGISTER_TYPE_UD,
2135 "BFI2 destination type must be D or UD");
2136
2137 for (unsigned s = 0; s < 3; s++) {
2138 enum elk_reg_type src_type = elk_inst_3src_a16_src_type(devinfo, inst);
2139
2140 ERROR_IF(src_type != dst_type,
2141 "BFI2 source type must match destination type");
2142 }
2143 }
2144
2145 if (elk_inst_opcode(isa, inst) == ELK_OPCODE_CSEL) {
2146 ERROR_IF(elk_inst_pred_control(devinfo, inst) != ELK_PREDICATE_NONE,
2147 "CSEL cannot be predicated");
2148
2149 /* CSEL is CMP and SEL fused into one. The condition modifier, which
2150 * does not actually modify the flags, controls the built-in comparison.
2151 */
2152 ERROR_IF(elk_inst_cond_modifier(devinfo, inst) == ELK_CONDITIONAL_NONE,
2153 "CSEL must have a condition.");
2154
2155 ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1,
2156 "CSEL cannot have Align1.");
2157 enum elk_reg_type dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
2158
2159 if (devinfo->ver < 8) {
2160 ERROR_IF(devinfo->ver < 8, "CSEL not supported before Gfx8");
2161 } else {
2162 ERROR_IF(dst_type != ELK_REGISTER_TYPE_F &&
2163 dst_type != ELK_REGISTER_TYPE_HF &&
2164 dst_type != ELK_REGISTER_TYPE_D &&
2165 dst_type != ELK_REGISTER_TYPE_W,
2166 "CSEL destination type must be F, HF, D, or W");
2167 }
2168
2169 for (unsigned s = 0; s < 3; s++) {
2170 enum elk_reg_type src_type = elk_inst_3src_a16_src_type(devinfo, inst);
2171
2172 ERROR_IF(src_type != dst_type,
2173 "CSEL source type must match destination type");
2174 }
2175 }
2176
2177 return error_msg;
2178 }
2179
2180 static struct string
send_descriptor_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)2181 send_descriptor_restrictions(const struct elk_isa_info *isa,
2182 const elk_inst *inst)
2183 {
2184 const struct intel_device_info *devinfo = isa->devinfo;
2185 struct string error_msg = { .str = NULL, .len = 0 };
2186
2187 if (inst_is_send(isa, inst)) {
2188 /* We can only validate immediate descriptors */
2189 if (elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE)
2190 return error_msg;
2191 } else {
2192 return error_msg;
2193 }
2194
2195 if (elk_inst_sfid(devinfo, inst) == ELK_SFID_URB) {
2196 /* Gfx4 doesn't have a "header present" bit in the SEND message. */
2197 ERROR_IF(devinfo->ver > 4 && !elk_inst_header_present(devinfo, inst),
2198 "Header must be present for all URB messages.");
2199
2200 switch (elk_inst_urb_opcode(devinfo, inst)) {
2201 case ELK_URB_OPCODE_WRITE_HWORD:
2202 break;
2203
2204 /* case FF_SYNC: */
2205 case ELK_URB_OPCODE_WRITE_OWORD:
2206 /* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the
2207 * same opcode value.
2208 */
2209 if (devinfo->ver == 5 || devinfo->ver == 6) {
2210 ERROR_IF(elk_inst_urb_global_offset(devinfo, inst) != 0,
2211 "FF_SYNC global offset must be zero.");
2212 ERROR_IF(elk_inst_urb_swizzle_control(devinfo, inst) != 0,
2213 "FF_SYNC swizzle control must be zero.");
2214 ERROR_IF(elk_inst_urb_used(devinfo, inst) != 0,
2215 "FF_SYNC used must be zero.");
2216 ERROR_IF(elk_inst_urb_complete(devinfo, inst) != 0,
2217 "FF_SYNC complete must be zero.");
2218
2219 /* Volume 4 part 2 of the Sandybridge PRM (page 28) says:
2220 *
2221 * A message response (writeback) length of 1 GRF will be
2222 * indicated on the ‘send’ instruction if the thread requires
2223 * response data and/or synchronization.
2224 */
2225 ERROR_IF((unsigned)elk_inst_rlen(devinfo, inst) > 1,
2226 "FF_SYNC read length must be 0 or 1.");
2227 } else {
2228 ERROR_IF(devinfo->ver < 7,
2229 "URB OWORD write messages only valid on gfx >= 7");
2230 }
2231 break;
2232
2233 case ELK_URB_OPCODE_READ_HWORD:
2234 case ELK_URB_OPCODE_READ_OWORD:
2235 ERROR_IF(devinfo->ver < 7,
2236 "URB read messages only valid on gfx >= 7");
2237 break;
2238
2239 case GFX7_URB_OPCODE_ATOMIC_MOV:
2240 case GFX7_URB_OPCODE_ATOMIC_INC:
2241 ERROR_IF(devinfo->ver < 7,
2242 "URB atomic move and increment messages only valid on gfx >= 7");
2243 break;
2244
2245 case GFX8_URB_OPCODE_ATOMIC_ADD:
2246 /* The Haswell PRM lists this opcode as valid on page 317. */
2247 ERROR_IF(devinfo->verx10 < 75,
2248 "URB atomic add message only valid on gfx >= 7.5");
2249 break;
2250
2251 case GFX8_URB_OPCODE_SIMD8_READ:
2252 ERROR_IF(elk_inst_rlen(devinfo, inst) == 0,
2253 "URB SIMD8 read message must read some data.");
2254 FALLTHROUGH;
2255
2256 case GFX8_URB_OPCODE_SIMD8_WRITE:
2257 ERROR_IF(devinfo->ver < 8,
2258 "URB SIMD8 messages only valid on gfx >= 8");
2259 break;
2260
2261 default:
2262 ERROR_IF(true, "Invalid URB message");
2263 break;
2264 }
2265 }
2266
2267 return error_msg;
2268 }
2269
2270 bool
elk_validate_instruction(const struct elk_isa_info * isa,const elk_inst * inst,int offset,unsigned inst_size,struct elk_disasm_info * disasm)2271 elk_validate_instruction(const struct elk_isa_info *isa,
2272 const elk_inst *inst, int offset,
2273 unsigned inst_size,
2274 struct elk_disasm_info *disasm)
2275 {
2276 struct string error_msg = { .str = NULL, .len = 0 };
2277
2278 if (is_unsupported_inst(isa, inst)) {
2279 ERROR("Instruction not supported on this Gen");
2280 } else {
2281 CHECK(invalid_values);
2282
2283 if (error_msg.str == NULL) {
2284 CHECK(sources_not_null);
2285 CHECK(send_restrictions);
2286 CHECK(general_restrictions_based_on_operand_types);
2287 CHECK(general_restrictions_on_region_parameters);
2288 CHECK(special_restrictions_for_mixed_float_mode);
2289 CHECK(region_alignment_rules);
2290 CHECK(vector_immediate_restrictions);
2291 CHECK(special_requirements_for_handling_double_precision_data_types);
2292 CHECK(instruction_restrictions);
2293 CHECK(send_descriptor_restrictions);
2294 }
2295 }
2296
2297 if (error_msg.str && disasm) {
2298 elk_disasm_insert_error(disasm, offset, inst_size, error_msg.str);
2299 }
2300 free(error_msg.str);
2301
2302 return error_msg.len == 0;
2303 }
2304
2305 bool
elk_validate_instructions(const struct elk_isa_info * isa,const void * assembly,int start_offset,int end_offset,struct elk_disasm_info * disasm)2306 elk_validate_instructions(const struct elk_isa_info *isa,
2307 const void *assembly, int start_offset, int end_offset,
2308 struct elk_disasm_info *disasm)
2309 {
2310 const struct intel_device_info *devinfo = isa->devinfo;
2311 bool valid = true;
2312
2313 for (int src_offset = start_offset; src_offset < end_offset;) {
2314 const elk_inst *inst = assembly + src_offset;
2315 bool is_compact = elk_inst_cmpt_control(devinfo, inst);
2316 unsigned inst_size = is_compact ? sizeof(elk_compact_inst)
2317 : sizeof(elk_inst);
2318 elk_inst uncompacted;
2319
2320 if (is_compact) {
2321 elk_compact_inst *compacted = (void *)inst;
2322 elk_uncompact_instruction(isa, &uncompacted, compacted);
2323 inst = &uncompacted;
2324 }
2325
2326 bool v = elk_validate_instruction(isa, inst, src_offset,
2327 inst_size, disasm);
2328 valid = valid && v;
2329
2330 src_offset += inst_size;
2331 }
2332
2333 return valid;
2334 }
2335