xref: /aosp_15_r20/external/mesa3d/src/intel/compiler/elk/elk_eu_validate.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2015-2019 Intel Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 /** @file elk_eu_validate.c
25  *
26  * This file implements a pass that validates shader assembly.
27  *
28  * The restrictions implemented herein are intended to verify that instructions
29  * in shader assembly do not violate restrictions documented in the graphics
30  * programming reference manuals.
31  *
32  * The restrictions are difficult for humans to quickly verify due to their
33  * complexity and abundance.
34  *
35  * It is critical that this code is thoroughly unit tested because false
36  * results will lead developers astray, which is worse than having no validator
37  * at all. Functional changes to this file without corresponding unit tests (in
38  * test_eu_validate.cpp) will be rejected.
39  */
40 
41 #include <stdlib.h>
42 #include "elk_eu.h"
43 #include "elk_disasm_info.h"
44 
45 /* We're going to do lots of string concatenation, so this should help. */
46 struct string {
47    char *str;
48    size_t len;
49 };
50 
51 static void
cat(struct string * dest,const struct string src)52 cat(struct string *dest, const struct string src)
53 {
54    dest->str = realloc(dest->str, dest->len + src.len + 1);
55    memcpy(dest->str + dest->len, src.str, src.len);
56    dest->str[dest->len + src.len] = '\0';
57    dest->len = dest->len + src.len;
58 }
59 #define CAT(dest, src) cat(&dest, (struct string){src, strlen(src)})
60 
61 static bool
contains(const struct string haystack,const struct string needle)62 contains(const struct string haystack, const struct string needle)
63 {
64    return haystack.str && memmem(haystack.str, haystack.len,
65                                  needle.str, needle.len) != NULL;
66 }
67 #define CONTAINS(haystack, needle) \
68    contains(haystack, (struct string){needle, strlen(needle)})
69 
70 #define error(str)   "\tERROR: " str "\n"
71 #define ERROR_INDENT "\t       "
72 
73 #define ERROR(msg) ERROR_IF(true, msg)
74 #define ERROR_IF(cond, msg)                             \
75    do {                                                 \
76       if ((cond) && !CONTAINS(error_msg, error(msg))) { \
77          CAT(error_msg, error(msg));                    \
78       }                                                 \
79    } while(0)
80 
81 #define CHECK(func, args...)                             \
82    do {                                                  \
83       struct string __msg = func(isa, inst, ##args); \
84       if (__msg.str) {                                   \
85          cat(&error_msg, __msg);                         \
86          free(__msg.str);                                \
87       }                                                  \
88    } while (0)
89 
90 #define STRIDE(stride) (stride != 0 ? 1 << ((stride) - 1) : 0)
91 #define WIDTH(width)   (1 << (width))
92 
93 static bool
inst_is_send(const struct elk_isa_info * isa,const elk_inst * inst)94 inst_is_send(const struct elk_isa_info *isa, const elk_inst *inst)
95 {
96    switch (elk_inst_opcode(isa, inst)) {
97    case ELK_OPCODE_SEND:
98    case ELK_OPCODE_SENDC:
99       return true;
100    default:
101       return false;
102    }
103 }
104 
105 static unsigned
signed_type(unsigned type)106 signed_type(unsigned type)
107 {
108    switch (type) {
109    case ELK_REGISTER_TYPE_UD: return ELK_REGISTER_TYPE_D;
110    case ELK_REGISTER_TYPE_UW: return ELK_REGISTER_TYPE_W;
111    case ELK_REGISTER_TYPE_UB: return ELK_REGISTER_TYPE_B;
112    case ELK_REGISTER_TYPE_UQ: return ELK_REGISTER_TYPE_Q;
113    default:                   return type;
114    }
115 }
116 
117 static bool
inst_is_raw_move(const struct elk_isa_info * isa,const elk_inst * inst)118 inst_is_raw_move(const struct elk_isa_info *isa, const elk_inst *inst)
119 {
120    const struct intel_device_info *devinfo = isa->devinfo;
121 
122    unsigned dst_type = signed_type(elk_inst_dst_type(devinfo, inst));
123    unsigned src_type = signed_type(elk_inst_src0_type(devinfo, inst));
124 
125    if (elk_inst_src0_reg_file(devinfo, inst) == ELK_IMMEDIATE_VALUE) {
126       /* FIXME: not strictly true */
127       if (elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_VF ||
128           elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_UV ||
129           elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_V) {
130          return false;
131       }
132    } else if (elk_inst_src0_negate(devinfo, inst) ||
133               elk_inst_src0_abs(devinfo, inst)) {
134       return false;
135    }
136 
137    return elk_inst_opcode(isa, inst) == ELK_OPCODE_MOV &&
138           elk_inst_saturate(devinfo, inst) == 0 &&
139           dst_type == src_type;
140 }
141 
142 static bool
dst_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)143 dst_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
144 {
145    return elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
146           elk_inst_dst_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
147 }
148 
149 static bool
src0_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)150 src0_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
151 {
152    return elk_inst_src0_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT &&
153           elk_inst_src0_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
154           elk_inst_src0_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
155 }
156 
157 static bool
src1_is_null(const struct intel_device_info * devinfo,const elk_inst * inst)158 src1_is_null(const struct intel_device_info *devinfo, const elk_inst *inst)
159 {
160    return elk_inst_src1_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
161           elk_inst_src1_da_reg_nr(devinfo, inst) == ELK_ARF_NULL;
162 }
163 
164 static bool
src0_is_acc(const struct intel_device_info * devinfo,const elk_inst * inst)165 src0_is_acc(const struct intel_device_info *devinfo, const elk_inst *inst)
166 {
167    return elk_inst_src0_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
168           (elk_inst_src0_da_reg_nr(devinfo, inst) & 0xF0) == ELK_ARF_ACCUMULATOR;
169 }
170 
171 static bool
src1_is_acc(const struct intel_device_info * devinfo,const elk_inst * inst)172 src1_is_acc(const struct intel_device_info *devinfo, const elk_inst *inst)
173 {
174    return elk_inst_src1_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
175           (elk_inst_src1_da_reg_nr(devinfo, inst) & 0xF0) == ELK_ARF_ACCUMULATOR;
176 }
177 
178 static bool
src0_has_scalar_region(const struct intel_device_info * devinfo,const elk_inst * inst)179 src0_has_scalar_region(const struct intel_device_info *devinfo,
180                        const elk_inst *inst)
181 {
182    return elk_inst_src0_vstride(devinfo, inst) == ELK_VERTICAL_STRIDE_0 &&
183           elk_inst_src0_width(devinfo, inst) == ELK_WIDTH_1 &&
184           elk_inst_src0_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0;
185 }
186 
187 static bool
src1_has_scalar_region(const struct intel_device_info * devinfo,const elk_inst * inst)188 src1_has_scalar_region(const struct intel_device_info *devinfo,
189                        const elk_inst *inst)
190 {
191    return elk_inst_src1_vstride(devinfo, inst) == ELK_VERTICAL_STRIDE_0 &&
192           elk_inst_src1_width(devinfo, inst) == ELK_WIDTH_1 &&
193           elk_inst_src1_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0;
194 }
195 
196 static struct string
invalid_values(const struct elk_isa_info * isa,const elk_inst * inst)197 invalid_values(const struct elk_isa_info *isa, const elk_inst *inst)
198 {
199    const struct intel_device_info *devinfo = isa->devinfo;
200 
201    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
202    struct string error_msg = { .str = NULL, .len = 0 };
203 
204    switch ((enum elk_execution_size) elk_inst_exec_size(devinfo, inst)) {
205    case ELK_EXECUTE_1:
206    case ELK_EXECUTE_2:
207    case ELK_EXECUTE_4:
208    case ELK_EXECUTE_8:
209    case ELK_EXECUTE_16:
210    case ELK_EXECUTE_32:
211       break;
212    default:
213       ERROR("invalid execution size");
214       break;
215    }
216 
217    if (error_msg.str)
218       return error_msg;
219 
220    if (inst_is_send(isa, inst))
221       return error_msg;
222 
223    if (num_sources == 3) {
224       /* Nothing to test:
225        *    No 3-src instructions on Gfx4-5
226        *    No reg file bits on Gfx6-10 (align16)
227        *    No invalid encodings on Gfx10-12 (align1)
228        */
229    } else {
230       if (devinfo->ver > 6) {
231          ERROR_IF(elk_inst_dst_reg_file(devinfo, inst) == MRF ||
232                   (num_sources > 0 &&
233                    elk_inst_src0_reg_file(devinfo, inst) == MRF) ||
234                   (num_sources > 1 &&
235                    elk_inst_src1_reg_file(devinfo, inst) == MRF),
236                   "invalid register file encoding");
237       }
238    }
239 
240    if (error_msg.str)
241       return error_msg;
242 
243    if (num_sources == 3) {
244       if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
245          ERROR("Align1 mode not allowed on Gen < 10");
246       } else {
247          ERROR_IF(elk_inst_3src_a16_dst_type(devinfo, inst) == INVALID_REG_TYPE ||
248                   elk_inst_3src_a16_src_type(devinfo, inst) == INVALID_REG_TYPE,
249                   "invalid register type encoding");
250       }
251    } else {
252       ERROR_IF(elk_inst_dst_type (devinfo, inst) == INVALID_REG_TYPE ||
253                (num_sources > 0 &&
254                 elk_inst_src0_type(devinfo, inst) == INVALID_REG_TYPE) ||
255                (num_sources > 1 &&
256                 elk_inst_src1_type(devinfo, inst) == INVALID_REG_TYPE),
257                "invalid register type encoding");
258    }
259 
260    return error_msg;
261 }
262 
263 static struct string
sources_not_null(const struct elk_isa_info * isa,const elk_inst * inst)264 sources_not_null(const struct elk_isa_info *isa,
265                  const elk_inst *inst)
266 {
267    const struct intel_device_info *devinfo = isa->devinfo;
268    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
269    struct string error_msg = { .str = NULL, .len = 0 };
270 
271    /* Nothing to test. 3-src instructions can only have GRF sources, and
272     * there's no bit to control the file.
273     */
274    if (num_sources == 3)
275       return (struct string){};
276 
277    if (num_sources >= 1)
278       ERROR_IF(src0_is_null(devinfo, inst), "src0 is null");
279 
280    if (num_sources == 2)
281       ERROR_IF(src1_is_null(devinfo, inst), "src1 is null");
282 
283    return error_msg;
284 }
285 
286 static bool
inst_uses_src_acc(const struct elk_isa_info * isa,const elk_inst * inst)287 inst_uses_src_acc(const struct elk_isa_info *isa,
288                   const elk_inst *inst)
289 {
290    const struct intel_device_info *devinfo = isa->devinfo;
291 
292    /* Check instructions that use implicit accumulator sources */
293    switch (elk_inst_opcode(isa, inst)) {
294    case ELK_OPCODE_MAC:
295    case ELK_OPCODE_MACH:
296    case ELK_OPCODE_SADA2:
297       return true;
298    default:
299       break;
300    }
301 
302    /* FIXME: support 3-src instructions */
303    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
304    assert(num_sources < 3);
305 
306    return src0_is_acc(devinfo, inst) || (num_sources > 1 && src1_is_acc(devinfo, inst));
307 }
308 
309 static struct string
send_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)310 send_restrictions(const struct elk_isa_info *isa,
311                   const elk_inst *inst)
312 {
313    const struct intel_device_info *devinfo = isa->devinfo;
314 
315    struct string error_msg = { .str = NULL, .len = 0 };
316 
317    if (inst_is_send(isa, inst)) {
318       ERROR_IF(elk_inst_src0_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT,
319                "send must use direct addressing");
320 
321       if (devinfo->ver >= 7) {
322          ERROR_IF(elk_inst_send_src0_reg_file(devinfo, inst) != ELK_GENERAL_REGISTER_FILE,
323                   "send from non-GRF");
324          ERROR_IF(elk_inst_eot(devinfo, inst) &&
325                   elk_inst_src0_da_reg_nr(devinfo, inst) < 112,
326                   "send with EOT must use g112-g127");
327       }
328 
329       if (devinfo->ver >= 8) {
330          ERROR_IF(!dst_is_null(devinfo, inst) &&
331                   (elk_inst_dst_da_reg_nr(devinfo, inst) +
332                    elk_inst_rlen(devinfo, inst) > 127) &&
333                   (elk_inst_src0_da_reg_nr(devinfo, inst) +
334                    elk_inst_mlen(devinfo, inst) >
335                    elk_inst_dst_da_reg_nr(devinfo, inst)),
336                   "r127 must not be used for return address when there is "
337                   "a src and dest overlap");
338       }
339    }
340 
341    return error_msg;
342 }
343 
344 static bool
is_unsupported_inst(const struct elk_isa_info * isa,const elk_inst * inst)345 is_unsupported_inst(const struct elk_isa_info *isa,
346                     const elk_inst *inst)
347 {
348    return elk_inst_opcode(isa, inst) == ELK_OPCODE_ILLEGAL;
349 }
350 
351 /**
352  * Returns whether a combination of two types would qualify as mixed float
353  * operation mode
354  */
355 static inline bool
types_are_mixed_float(enum elk_reg_type t0,enum elk_reg_type t1)356 types_are_mixed_float(enum elk_reg_type t0, enum elk_reg_type t1)
357 {
358    return (t0 == ELK_REGISTER_TYPE_F && t1 == ELK_REGISTER_TYPE_HF) ||
359           (t1 == ELK_REGISTER_TYPE_F && t0 == ELK_REGISTER_TYPE_HF);
360 }
361 
362 static enum elk_reg_type
execution_type_for_type(enum elk_reg_type type)363 execution_type_for_type(enum elk_reg_type type)
364 {
365    switch (type) {
366    case ELK_REGISTER_TYPE_NF:
367    case ELK_REGISTER_TYPE_DF:
368    case ELK_REGISTER_TYPE_F:
369    case ELK_REGISTER_TYPE_HF:
370       return type;
371 
372    case ELK_REGISTER_TYPE_VF:
373       return ELK_REGISTER_TYPE_F;
374 
375    case ELK_REGISTER_TYPE_Q:
376    case ELK_REGISTER_TYPE_UQ:
377       return ELK_REGISTER_TYPE_Q;
378 
379    case ELK_REGISTER_TYPE_D:
380    case ELK_REGISTER_TYPE_UD:
381       return ELK_REGISTER_TYPE_D;
382 
383    case ELK_REGISTER_TYPE_W:
384    case ELK_REGISTER_TYPE_UW:
385    case ELK_REGISTER_TYPE_B:
386    case ELK_REGISTER_TYPE_UB:
387    case ELK_REGISTER_TYPE_V:
388    case ELK_REGISTER_TYPE_UV:
389       return ELK_REGISTER_TYPE_W;
390    }
391    unreachable("not reached");
392 }
393 
394 /**
395  * Returns the execution type of an instruction \p inst
396  */
397 static enum elk_reg_type
execution_type(const struct elk_isa_info * isa,const elk_inst * inst)398 execution_type(const struct elk_isa_info *isa, const elk_inst *inst)
399 {
400    const struct intel_device_info *devinfo = isa->devinfo;
401 
402    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
403    enum elk_reg_type src0_exec_type, src1_exec_type;
404 
405    /* Execution data type is independent of destination data type, except in
406     * mixed F/HF instructions.
407     */
408    enum elk_reg_type dst_exec_type = elk_inst_dst_type(devinfo, inst);
409 
410    src0_exec_type = execution_type_for_type(elk_inst_src0_type(devinfo, inst));
411    if (num_sources == 1) {
412       if (src0_exec_type == ELK_REGISTER_TYPE_HF)
413          return dst_exec_type;
414       return src0_exec_type;
415    }
416 
417    src1_exec_type = execution_type_for_type(elk_inst_src1_type(devinfo, inst));
418    if (types_are_mixed_float(src0_exec_type, src1_exec_type) ||
419        types_are_mixed_float(src0_exec_type, dst_exec_type) ||
420        types_are_mixed_float(src1_exec_type, dst_exec_type)) {
421       return ELK_REGISTER_TYPE_F;
422    }
423 
424    if (src0_exec_type == src1_exec_type)
425       return src0_exec_type;
426 
427    if (src0_exec_type == ELK_REGISTER_TYPE_NF ||
428        src1_exec_type == ELK_REGISTER_TYPE_NF)
429       return ELK_REGISTER_TYPE_NF;
430 
431    /* Mixed operand types where one is float is float on Gen < 6
432     * (and not allowed on later platforms)
433     */
434    if (devinfo->ver < 6 &&
435        (src0_exec_type == ELK_REGISTER_TYPE_F ||
436         src1_exec_type == ELK_REGISTER_TYPE_F))
437       return ELK_REGISTER_TYPE_F;
438 
439    if (src0_exec_type == ELK_REGISTER_TYPE_Q ||
440        src1_exec_type == ELK_REGISTER_TYPE_Q)
441       return ELK_REGISTER_TYPE_Q;
442 
443    if (src0_exec_type == ELK_REGISTER_TYPE_D ||
444        src1_exec_type == ELK_REGISTER_TYPE_D)
445       return ELK_REGISTER_TYPE_D;
446 
447    if (src0_exec_type == ELK_REGISTER_TYPE_W ||
448        src1_exec_type == ELK_REGISTER_TYPE_W)
449       return ELK_REGISTER_TYPE_W;
450 
451    if (src0_exec_type == ELK_REGISTER_TYPE_DF ||
452        src1_exec_type == ELK_REGISTER_TYPE_DF)
453       return ELK_REGISTER_TYPE_DF;
454 
455    unreachable("not reached");
456 }
457 
458 /**
459  * Returns whether a region is packed
460  *
461  * A region is packed if its elements are adjacent in memory, with no
462  * intervening space, no overlap, and no replicated values.
463  */
464 static bool
is_packed(unsigned vstride,unsigned width,unsigned hstride)465 is_packed(unsigned vstride, unsigned width, unsigned hstride)
466 {
467    if (vstride == width) {
468       if (vstride == 1) {
469          return hstride == 0;
470       } else {
471          return hstride == 1;
472       }
473    }
474 
475    return false;
476 }
477 
478 /**
479  * Returns whether a region is linear
480  *
481  * A region is linear if its elements do not overlap and are not replicated.
482  * Unlike a packed region, intervening space (i.e. strided values) is allowed.
483  */
484 static bool
is_linear(unsigned vstride,unsigned width,unsigned hstride)485 is_linear(unsigned vstride, unsigned width, unsigned hstride)
486 {
487    return vstride == width * hstride ||
488           (hstride == 0 && width == 1);
489 }
490 
491 /**
492  * Returns whether an instruction is an explicit or implicit conversion
493  * to/from half-float.
494  */
495 static bool
is_half_float_conversion(const struct elk_isa_info * isa,const elk_inst * inst)496 is_half_float_conversion(const struct elk_isa_info *isa,
497                          const elk_inst *inst)
498 {
499    const struct intel_device_info *devinfo = isa->devinfo;
500 
501    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
502 
503    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
504    enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
505 
506    if (dst_type != src0_type &&
507        (dst_type == ELK_REGISTER_TYPE_HF || src0_type == ELK_REGISTER_TYPE_HF)) {
508       return true;
509    } else if (num_sources > 1) {
510       enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
511       return dst_type != src1_type &&
512             (dst_type == ELK_REGISTER_TYPE_HF ||
513              src1_type == ELK_REGISTER_TYPE_HF);
514    }
515 
516    return false;
517 }
518 
519 /*
520  * Returns whether an instruction is using mixed float operation mode
521  */
522 static bool
is_mixed_float(const struct elk_isa_info * isa,const elk_inst * inst)523 is_mixed_float(const struct elk_isa_info *isa, const elk_inst *inst)
524 {
525    const struct intel_device_info *devinfo = isa->devinfo;
526 
527    if (devinfo->ver < 8)
528       return false;
529 
530    if (inst_is_send(isa, inst))
531       return false;
532 
533    unsigned opcode = elk_inst_opcode(isa, inst);
534    const struct elk_opcode_desc *desc = elk_opcode_desc(isa, opcode);
535    if (desc->ndst == 0)
536       return false;
537 
538    /* FIXME: support 3-src instructions */
539    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
540    assert(num_sources < 3);
541 
542    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
543    enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
544 
545    if (num_sources == 1)
546       return types_are_mixed_float(src0_type, dst_type);
547 
548    enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
549 
550    return types_are_mixed_float(src0_type, src1_type) ||
551           types_are_mixed_float(src0_type, dst_type) ||
552           types_are_mixed_float(src1_type, dst_type);
553 }
554 
555 /**
556  * Returns whether an instruction is an explicit or implicit conversion
557  * to/from byte.
558  */
559 static bool
is_byte_conversion(const struct elk_isa_info * isa,const elk_inst * inst)560 is_byte_conversion(const struct elk_isa_info *isa,
561                    const elk_inst *inst)
562 {
563    const struct intel_device_info *devinfo = isa->devinfo;
564 
565    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
566 
567    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
568    enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
569 
570    if (dst_type != src0_type &&
571        (type_sz(dst_type) == 1 || type_sz(src0_type) == 1)) {
572       return true;
573    } else if (num_sources > 1) {
574       enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
575       return dst_type != src1_type &&
576             (type_sz(dst_type) == 1 || type_sz(src1_type) == 1);
577    }
578 
579    return false;
580 }
581 
582 /**
583  * Checks restrictions listed in "General Restrictions Based on Operand Types"
584  * in the "Register Region Restrictions" section.
585  */
586 static struct string
general_restrictions_based_on_operand_types(const struct elk_isa_info * isa,const elk_inst * inst)587 general_restrictions_based_on_operand_types(const struct elk_isa_info *isa,
588                                             const elk_inst *inst)
589 {
590    const struct intel_device_info *devinfo = isa->devinfo;
591 
592    const struct elk_opcode_desc *desc =
593       elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
594    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
595    unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
596    struct string error_msg = { .str = NULL, .len = 0 };
597 
598    if (inst_is_send(isa, inst))
599       return error_msg;
600 
601    enum elk_reg_type dst_type;
602 
603    if (num_sources == 3) {
604       dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
605    } else {
606       dst_type = elk_inst_dst_type(devinfo, inst);
607    }
608 
609    ERROR_IF(dst_type == ELK_REGISTER_TYPE_DF &&
610             !devinfo->has_64bit_float,
611             "64-bit float destination, but platform does not support it");
612 
613    ERROR_IF((dst_type == ELK_REGISTER_TYPE_Q ||
614              dst_type == ELK_REGISTER_TYPE_UQ) &&
615             !devinfo->has_64bit_int,
616             "64-bit int destination, but platform does not support it");
617 
618    for (unsigned s = 0; s < num_sources; s++) {
619       enum elk_reg_type src_type;
620       if (num_sources == 3) {
621          src_type = elk_inst_3src_a16_src_type(devinfo, inst);
622       } else {
623          switch (s) {
624          case 0: src_type = elk_inst_src0_type(devinfo, inst); break;
625          case 1: src_type = elk_inst_src1_type(devinfo, inst); break;
626          default: unreachable("invalid src");
627          }
628       }
629 
630       ERROR_IF(src_type == ELK_REGISTER_TYPE_DF &&
631                !devinfo->has_64bit_float,
632                "64-bit float source, but platform does not support it");
633 
634       ERROR_IF((src_type == ELK_REGISTER_TYPE_Q ||
635                 src_type == ELK_REGISTER_TYPE_UQ) &&
636                !devinfo->has_64bit_int,
637                "64-bit int source, but platform does not support it");
638    }
639 
640    if (num_sources == 3)
641       return error_msg;
642 
643    if (exec_size == 1)
644       return error_msg;
645 
646    if (desc->ndst == 0)
647       return error_msg;
648 
649    /* The PRMs say:
650     *
651     *    Where n is the largest element size in bytes for any source or
652     *    destination operand type, ExecSize * n must be <= 64.
653     *
654     * But we do not attempt to enforce it, because it is implied by other
655     * rules:
656     *
657     *    - that the destination stride must match the execution data type
658     *    - sources may not span more than two adjacent GRF registers
659     *    - destination may not span more than two adjacent GRF registers
660     *
661     * In fact, checking it would weaken testing of the other rules.
662     */
663 
664    unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
665    bool dst_type_is_byte =
666       elk_inst_dst_type(devinfo, inst) == ELK_REGISTER_TYPE_B ||
667       elk_inst_dst_type(devinfo, inst) == ELK_REGISTER_TYPE_UB;
668 
669    if (dst_type_is_byte) {
670       if (is_packed(exec_size * dst_stride, exec_size, dst_stride)) {
671          if (!inst_is_raw_move(isa, inst))
672             ERROR("Only raw MOV supports a packed-byte destination");
673          return error_msg;
674       }
675    }
676 
677    unsigned exec_type = execution_type(isa, inst);
678    unsigned exec_type_size = elk_reg_type_to_size(exec_type);
679    unsigned dst_type_size = elk_reg_type_to_size(dst_type);
680 
681    /* On IVB/BYT, region parameters and execution size for DF are in terms of
682     * 32-bit elements, so they are doubled. For evaluating the validity of an
683     * instruction, we halve them.
684     */
685    if (devinfo->verx10 == 70 &&
686        exec_type_size == 8 && dst_type_size == 4)
687       dst_type_size = 8;
688 
689    if (is_byte_conversion(isa, inst)) {
690       /* From the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
691        *
692        *    "There is no direct conversion from B/UB to DF or DF to B/UB.
693        *     There is no direct conversion from B/UB to Q/UQ or Q/UQ to B/UB."
694        *
695        * Even if these restrictions are listed for the MOV instruction, we
696        * validate this more generally, since there is the possibility
697        * of implicit conversions from other instructions.
698        */
699       enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
700       enum elk_reg_type src1_type = num_sources > 1 ?
701                                     elk_inst_src1_type(devinfo, inst) : 0;
702 
703       ERROR_IF(type_sz(dst_type) == 1 &&
704                (type_sz(src0_type) == 8 ||
705                 (num_sources > 1 && type_sz(src1_type) == 8)),
706                "There are no direct conversions between 64-bit types and B/UB");
707 
708       ERROR_IF(type_sz(dst_type) == 8 &&
709                (type_sz(src0_type) == 1 ||
710                 (num_sources > 1 && type_sz(src1_type) == 1)),
711                "There are no direct conversions between 64-bit types and B/UB");
712    }
713 
714    if (is_half_float_conversion(isa, inst)) {
715       /**
716        * A helper to validate used in the validation of the following restriction
717        * from the BDW+ PRM, Volume 2a, Command Reference, Instructions - MOV:
718        *
719        *    "There is no direct conversion from HF to DF or DF to HF.
720        *     There is no direct conversion from HF to Q/UQ or Q/UQ to HF."
721        *
722        * Even if these restrictions are listed for the MOV instruction, we
723        * validate this more generally, since there is the possibility
724        * of implicit conversions from other instructions, such us implicit
725        * conversion from integer to HF with the ADD instruction in SKL+.
726        */
727       enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
728       enum elk_reg_type src1_type = num_sources > 1 ?
729                                     elk_inst_src1_type(devinfo, inst) : 0;
730       ERROR_IF(dst_type == ELK_REGISTER_TYPE_HF &&
731                (type_sz(src0_type) == 8 ||
732                 (num_sources > 1 && type_sz(src1_type) == 8)),
733                "There are no direct conversions between 64-bit types and HF");
734 
735       ERROR_IF(type_sz(dst_type) == 8 &&
736                (src0_type == ELK_REGISTER_TYPE_HF ||
737                 (num_sources > 1 && src1_type == ELK_REGISTER_TYPE_HF)),
738                "There are no direct conversions between 64-bit types and HF");
739 
740       /* From the BDW+ PRM:
741        *
742        *   "Conversion between Integer and HF (Half Float) must be
743        *    DWord-aligned and strided by a DWord on the destination."
744        *
745        * Also, the above restrictions seems to be expanded on CHV and SKL+ by:
746        *
747        *   "There is a relaxed alignment rule for word destinations. When
748        *    the destination type is word (UW, W, HF), destination data types
749        *    can be aligned to either the lowest word or the second lowest
750        *    word of the execution channel. This means the destination data
751        *    words can be either all in the even word locations or all in the
752        *    odd word locations."
753        *
754        * We do not implement the second rule as is though, since empirical
755        * testing shows inconsistencies:
756        *   - It suggests that packed 16-bit is not allowed, which is not true.
757        *   - It suggests that conversions from Q/DF to W (which need to be
758        *     64-bit aligned on the destination) are not possible, which is
759        *     not true.
760        *
761        * So from this rule we only validate the implication that conversions
762        * from F to HF need to be DWord strided (except in Align1 mixed
763        * float mode where packed fp16 destination is allowed so long as the
764        * destination is oword-aligned).
765        *
766        * Finally, we only validate this for Align1 because Align16 always
767        * requires packed destinations, so these restrictions can't possibly
768        * apply to Align16 mode.
769        */
770       if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1) {
771          if ((dst_type == ELK_REGISTER_TYPE_HF &&
772               (elk_reg_type_is_integer(src0_type) ||
773                (num_sources > 1 && elk_reg_type_is_integer(src1_type)))) ||
774              (elk_reg_type_is_integer(dst_type) &&
775               (src0_type == ELK_REGISTER_TYPE_HF ||
776                (num_sources > 1 && src1_type == ELK_REGISTER_TYPE_HF)))) {
777             ERROR_IF(dst_stride * dst_type_size != 4,
778                      "Conversions between integer and half-float must be "
779                      "strided by a DWord on the destination");
780 
781             unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
782             ERROR_IF(subreg % 4 != 0,
783                      "Conversions between integer and half-float must be "
784                      "aligned to a DWord on the destination");
785          } else if (devinfo->platform == INTEL_PLATFORM_CHV &&
786                     dst_type == ELK_REGISTER_TYPE_HF) {
787             unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
788             ERROR_IF(dst_stride != 2 &&
789                      !(is_mixed_float(isa, inst) &&
790                        dst_stride == 1 && subreg % 16 == 0),
791                      "Conversions to HF must have either all words in even "
792                      "word locations or all words in odd word locations or "
793                      "be mixed-float with Oword-aligned packed destination");
794          }
795       }
796    }
797 
798    /* There are special regioning rules for mixed-float mode in CHV and SKL that
799     * override the general rule for the ratio of sizes of the destination type
800     * and the execution type. We will add validation for those in a later patch.
801     */
802    bool validate_dst_size_and_exec_size_ratio =
803       !is_mixed_float(isa, inst) ||
804       !(devinfo->platform == INTEL_PLATFORM_CHV);
805 
806    if (validate_dst_size_and_exec_size_ratio &&
807        exec_type_size > dst_type_size) {
808       if (!(dst_type_is_byte && inst_is_raw_move(isa, inst))) {
809          ERROR_IF(dst_stride * dst_type_size != exec_type_size,
810                   "Destination stride must be equal to the ratio of the sizes "
811                   "of the execution data type to the destination type");
812       }
813 
814       unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
815 
816       if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 &&
817           elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT) {
818          /* The i965 PRM says:
819           *
820           *    Implementation Restriction: The relaxed alignment rule for byte
821           *    destination (#10.5) is not supported.
822           */
823          if (devinfo->verx10 >= 45 && dst_type_is_byte) {
824             ERROR_IF(subreg % exec_type_size != 0 &&
825                      subreg % exec_type_size != 1,
826                      "Destination subreg must be aligned to the size of the "
827                      "execution data type (or to the next lowest byte for byte "
828                      "destinations)");
829          } else {
830             ERROR_IF(subreg % exec_type_size != 0,
831                      "Destination subreg must be aligned to the size of the "
832                      "execution data type");
833          }
834       }
835    }
836 
837    return error_msg;
838 }
839 
840 /**
841  * Checks restrictions listed in "General Restrictions on Regioning Parameters"
842  * in the "Register Region Restrictions" section.
843  */
844 static struct string
general_restrictions_on_region_parameters(const struct elk_isa_info * isa,const elk_inst * inst)845 general_restrictions_on_region_parameters(const struct elk_isa_info *isa,
846                                           const elk_inst *inst)
847 {
848    const struct intel_device_info *devinfo = isa->devinfo;
849 
850    const struct elk_opcode_desc *desc =
851       elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
852    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
853    unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
854    struct string error_msg = { .str = NULL, .len = 0 };
855 
856    if (num_sources == 3)
857       return (struct string){};
858 
859    if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16) {
860       if (desc->ndst != 0 && !dst_is_null(devinfo, inst))
861          ERROR_IF(elk_inst_dst_hstride(devinfo, inst) != ELK_HORIZONTAL_STRIDE_1,
862                   "Destination Horizontal Stride must be 1");
863 
864       if (num_sources >= 1) {
865          if (devinfo->verx10 >= 75) {
866             ERROR_IF(elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
867                      elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
868                      elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_2 &&
869                      elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
870                      "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
871          } else {
872             ERROR_IF(elk_inst_src0_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
873                      elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
874                      elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
875                      "In Align16 mode, only VertStride of 0 or 4 is allowed");
876          }
877       }
878 
879       if (num_sources == 2) {
880          if (devinfo->verx10 >= 75) {
881             ERROR_IF(elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
882                      elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
883                      elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_2 &&
884                      elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
885                      "In Align16 mode, only VertStride of 0, 2, or 4 is allowed");
886          } else {
887             ERROR_IF(elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
888                      elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_0 &&
889                      elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
890                      "In Align16 mode, only VertStride of 0 or 4 is allowed");
891          }
892       }
893 
894       return error_msg;
895    }
896 
897    for (unsigned i = 0; i < num_sources; i++) {
898       unsigned vstride, width, hstride, element_size, subreg;
899       enum elk_reg_type type;
900 
901 #define DO_SRC(n)                                                              \
902       if (elk_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
903           ELK_IMMEDIATE_VALUE)                                                 \
904          continue;                                                             \
905                                                                                \
906       vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst));          \
907       width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst));               \
908       hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst));          \
909       type = elk_inst_src ## n ## _type(devinfo, inst);                        \
910       element_size = elk_reg_type_to_size(type);                               \
911       subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst)
912 
913       if (i == 0) {
914          DO_SRC(0);
915       } else {
916          DO_SRC(1);
917       }
918 #undef DO_SRC
919 
920       /* On IVB/BYT, region parameters and execution size for DF are in terms of
921        * 32-bit elements, so they are doubled. For evaluating the validity of an
922        * instruction, we halve them.
923        */
924       if (devinfo->verx10 == 70 &&
925           element_size == 8)
926          element_size = 4;
927 
928       /* ExecSize must be greater than or equal to Width. */
929       ERROR_IF(exec_size < width, "ExecSize must be greater than or equal "
930                                   "to Width");
931 
932       /* If ExecSize = Width and HorzStride ≠ 0,
933        * VertStride must be set to Width * HorzStride.
934        */
935       if (exec_size == width && hstride != 0) {
936          ERROR_IF(vstride != width * hstride,
937                   "If ExecSize = Width and HorzStride ≠ 0, "
938                   "VertStride must be set to Width * HorzStride");
939       }
940 
941       /* If Width = 1, HorzStride must be 0 regardless of the values of
942        * ExecSize and VertStride.
943        */
944       if (width == 1) {
945          ERROR_IF(hstride != 0,
946                   "If Width = 1, HorzStride must be 0 regardless "
947                   "of the values of ExecSize and VertStride");
948       }
949 
950       /* If ExecSize = Width = 1, both VertStride and HorzStride must be 0. */
951       if (exec_size == 1 && width == 1) {
952          ERROR_IF(vstride != 0 || hstride != 0,
953                   "If ExecSize = Width = 1, both VertStride "
954                   "and HorzStride must be 0");
955       }
956 
957       /* If VertStride = HorzStride = 0, Width must be 1 regardless of the
958        * value of ExecSize.
959        */
960       if (vstride == 0 && hstride == 0) {
961          ERROR_IF(width != 1,
962                   "If VertStride = HorzStride = 0, Width must be "
963                   "1 regardless of the value of ExecSize");
964       }
965 
966       /* VertStride must be used to cross GRF register boundaries. This rule
967        * implies that elements within a 'Width' cannot cross GRF boundaries.
968        */
969       const uint64_t mask = (1ULL << element_size) - 1;
970       unsigned rowbase = subreg;
971 
972       for (int y = 0; y < exec_size / width; y++) {
973          uint64_t access_mask = 0;
974          unsigned offset = rowbase;
975 
976          for (int x = 0; x < width; x++) {
977             access_mask |= mask << (offset % 64);
978             offset += hstride * element_size;
979          }
980 
981          rowbase += vstride * element_size;
982 
983          if ((uint32_t)access_mask != 0 && (access_mask >> 32) != 0) {
984             ERROR("VertStride must be used to cross GRF register boundaries");
985             break;
986          }
987       }
988    }
989 
990    /* Dst.HorzStride must not be 0. */
991    if (desc->ndst != 0 && !dst_is_null(devinfo, inst)) {
992       ERROR_IF(elk_inst_dst_hstride(devinfo, inst) == ELK_HORIZONTAL_STRIDE_0,
993                "Destination Horizontal Stride must not be 0");
994    }
995 
996    return error_msg;
997 }
998 
999 static struct string
special_restrictions_for_mixed_float_mode(const struct elk_isa_info * isa,const elk_inst * inst)1000 special_restrictions_for_mixed_float_mode(const struct elk_isa_info *isa,
1001                                           const elk_inst *inst)
1002 {
1003    const struct intel_device_info *devinfo = isa->devinfo;
1004 
1005    struct string error_msg = { .str = NULL, .len = 0 };
1006 
1007    const unsigned opcode = elk_inst_opcode(isa, inst);
1008    const unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1009    if (num_sources >= 3)
1010       return error_msg;
1011 
1012    if (!is_mixed_float(isa, inst))
1013       return error_msg;
1014 
1015    unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
1016    bool is_align16 = elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16;
1017 
1018    enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1019    enum elk_reg_type src1_type = num_sources > 1 ?
1020                                  elk_inst_src1_type(devinfo, inst) : 0;
1021    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1022 
1023    unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1024    bool dst_is_packed = is_packed(exec_size * dst_stride, exec_size, dst_stride);
1025 
1026    /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1027     * Float Operations:
1028     *
1029     *    "Indirect addressing on source is not supported when source and
1030     *     destination data types are mixed float."
1031     */
1032    ERROR_IF(elk_inst_src0_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT ||
1033             (num_sources > 1 &&
1034              elk_inst_src1_address_mode(devinfo, inst) != ELK_ADDRESS_DIRECT),
1035             "Indirect addressing on source is not supported when source and "
1036             "destination data types are mixed float");
1037 
1038    /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1039     * Float Operations:
1040     *
1041     *    "No SIMD16 in mixed mode when destination is f32. Instruction
1042     *     execution size must be no more than 8."
1043     */
1044    ERROR_IF(exec_size > 8 && dst_type == ELK_REGISTER_TYPE_F,
1045             "Mixed float mode with 32-bit float destination is limited "
1046             "to SIMD8");
1047 
1048    if (is_align16) {
1049       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1050        * Float Operations:
1051        *
1052        *   "In Align16 mode, when half float and float data types are mixed
1053        *    between source operands OR between source and destination operands,
1054        *    the register content are assumed to be packed."
1055        *
1056        * Since Align16 doesn't have a concept of horizontal stride (or width),
1057        * it means that vertical stride must always be 4, since 0 and 2 would
1058        * lead to replicated data, and any other value is disallowed in Align16.
1059        */
1060       ERROR_IF(elk_inst_src0_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
1061                "Align16 mixed float mode assumes packed data (vstride must be 4");
1062 
1063       ERROR_IF(num_sources >= 2 &&
1064                elk_inst_src1_vstride(devinfo, inst) != ELK_VERTICAL_STRIDE_4,
1065                "Align16 mixed float mode assumes packed data (vstride must be 4");
1066 
1067       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1068        * Float Operations:
1069        *
1070        *   "For Align16 mixed mode, both input and output packed f16 data
1071        *    must be oword aligned, no oword crossing in packed f16."
1072        *
1073        * The previous rule requires that Align16 operands are always packed,
1074        * and since there is only one bit for Align16 subnr, which represents
1075        * offsets 0B and 16B, this rule is always enforced and we don't need to
1076        * validate it.
1077        */
1078 
1079       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1080        * Float Operations:
1081        *
1082        *    "No SIMD16 in mixed mode when destination is packed f16 for both
1083        *     Align1 and Align16."
1084        *
1085        * And:
1086        *
1087        *   "In Align16 mode, when half float and float data types are mixed
1088        *    between source operands OR between source and destination operands,
1089        *    the register content are assumed to be packed."
1090        *
1091        * Which implies that SIMD16 is not available in Align16. This is further
1092        * confirmed by:
1093        *
1094        *    "For Align16 mixed mode, both input and output packed f16 data
1095        *     must be oword aligned, no oword crossing in packed f16"
1096        *
1097        * Since oword-aligned packed f16 data would cross oword boundaries when
1098        * the execution size is larger than 8.
1099        */
1100       ERROR_IF(exec_size > 8, "Align16 mixed float mode is limited to SIMD8");
1101 
1102       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1103        * Float Operations:
1104        *
1105        *    "No accumulator read access for Align16 mixed float."
1106        */
1107       ERROR_IF(inst_uses_src_acc(isa, inst),
1108                "No accumulator read access for Align16 mixed float");
1109    } else {
1110       assert(!is_align16);
1111 
1112       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1113        * Float Operations:
1114        *
1115        *    "No SIMD16 in mixed mode when destination is packed f16 for both
1116        *     Align1 and Align16."
1117        */
1118       ERROR_IF(exec_size > 8 && dst_is_packed &&
1119                dst_type == ELK_REGISTER_TYPE_HF,
1120                "Align1 mixed float mode is limited to SIMD8 when destination "
1121                "is packed half-float");
1122 
1123       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1124        * Float Operations:
1125        *
1126        *    "Math operations for mixed mode:
1127        *     - In Align1, f16 inputs need to be strided"
1128        */
1129       if (opcode == ELK_OPCODE_MATH) {
1130          if (src0_type == ELK_REGISTER_TYPE_HF) {
1131             ERROR_IF(STRIDE(elk_inst_src0_hstride(devinfo, inst)) <= 1,
1132                      "Align1 mixed mode math needs strided half-float inputs");
1133          }
1134 
1135          if (num_sources >= 2 && src1_type == ELK_REGISTER_TYPE_HF) {
1136             ERROR_IF(STRIDE(elk_inst_src1_hstride(devinfo, inst)) <= 1,
1137                      "Align1 mixed mode math needs strided half-float inputs");
1138          }
1139       }
1140 
1141       if (dst_type == ELK_REGISTER_TYPE_HF && dst_stride == 1) {
1142          /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1143           * Float Operations:
1144           *
1145           *    "In Align1, destination stride can be smaller than execution
1146           *     type. When destination is stride of 1, 16 bit packed data is
1147           *     updated on the destination. However, output packed f16 data
1148           *     must be oword aligned, no oword crossing in packed f16."
1149           *
1150           * The requirement of not crossing oword boundaries for 16-bit oword
1151           * aligned data means that execution size is limited to 8.
1152           */
1153          unsigned subreg;
1154          if (elk_inst_dst_address_mode(devinfo, inst) == ELK_ADDRESS_DIRECT)
1155             subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1156          else
1157             subreg = elk_inst_dst_ia_subreg_nr(devinfo, inst);
1158          ERROR_IF(subreg % 16 != 0,
1159                   "Align1 mixed mode packed half-float output must be "
1160                   "oword aligned");
1161          ERROR_IF(exec_size > 8,
1162                   "Align1 mixed mode packed half-float output must not "
1163                   "cross oword boundaries (max exec size is 8)");
1164 
1165          /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1166           * Float Operations:
1167           *
1168           *    "When source is float or half float from accumulator register and
1169           *     destination is half float with a stride of 1, the source must
1170           *     register aligned. i.e., source must have offset zero."
1171           *
1172           * Align16 mixed float mode doesn't allow accumulator access on sources,
1173           * so we only need to check this for Align1.
1174           */
1175          if (src0_is_acc(devinfo, inst) &&
1176              (src0_type == ELK_REGISTER_TYPE_F ||
1177               src0_type == ELK_REGISTER_TYPE_HF)) {
1178             ERROR_IF(elk_inst_src0_da1_subreg_nr(devinfo, inst) != 0,
1179                      "Mixed float mode requires register-aligned accumulator "
1180                      "source reads when destination is packed half-float");
1181 
1182          }
1183 
1184          if (num_sources > 1 &&
1185              src1_is_acc(devinfo, inst) &&
1186              (src1_type == ELK_REGISTER_TYPE_F ||
1187               src1_type == ELK_REGISTER_TYPE_HF)) {
1188             ERROR_IF(elk_inst_src1_da1_subreg_nr(devinfo, inst) != 0,
1189                      "Mixed float mode requires register-aligned accumulator "
1190                      "source reads when destination is packed half-float");
1191          }
1192       }
1193 
1194       /* From the SKL PRM, Special Restrictions for Handling Mixed Mode
1195        * Float Operations:
1196        *
1197        *    "No swizzle is allowed when an accumulator is used as an implicit
1198        *     source or an explicit source in an instruction. i.e. when
1199        *     destination is half float with an implicit accumulator source,
1200        *     destination stride needs to be 2."
1201        *
1202        * FIXME: it is not quite clear what the first sentence actually means
1203        *        or its link to the implication described after it, so we only
1204        *        validate the explicit implication, which is clearly described.
1205        */
1206       if (dst_type == ELK_REGISTER_TYPE_HF &&
1207           inst_uses_src_acc(isa, inst)) {
1208          ERROR_IF(dst_stride != 2,
1209                   "Mixed float mode with implicit/explicit accumulator "
1210                   "source and half-float destination requires a stride "
1211                   "of 2 on the destination");
1212       }
1213    }
1214 
1215    return error_msg;
1216 }
1217 
1218 /**
1219  * Creates an \p access_mask for an \p exec_size, \p element_size, and a region
1220  *
1221  * An \p access_mask is a 32-element array of uint64_t, where each uint64_t is
1222  * a bitmask of bytes accessed by the region.
1223  *
1224  * For instance the access mask of the source gX.1<4,2,2>F in an exec_size = 4
1225  * instruction would be
1226  *
1227  *    access_mask[0] = 0x00000000000000F0
1228  *    access_mask[1] = 0x000000000000F000
1229  *    access_mask[2] = 0x0000000000F00000
1230  *    access_mask[3] = 0x00000000F0000000
1231  *    access_mask[4-31] = 0
1232  *
1233  * because the first execution channel accesses bytes 7-4 and the second
1234  * execution channel accesses bytes 15-12, etc.
1235  */
1236 static void
align1_access_mask(uint64_t access_mask[static32],unsigned exec_size,unsigned element_size,unsigned subreg,unsigned vstride,unsigned width,unsigned hstride)1237 align1_access_mask(uint64_t access_mask[static 32],
1238                    unsigned exec_size, unsigned element_size, unsigned subreg,
1239                    unsigned vstride, unsigned width, unsigned hstride)
1240 {
1241    const uint64_t mask = (1ULL << element_size) - 1;
1242    unsigned rowbase = subreg;
1243    unsigned element = 0;
1244 
1245    for (int y = 0; y < exec_size / width; y++) {
1246       unsigned offset = rowbase;
1247 
1248       for (int x = 0; x < width; x++) {
1249          access_mask[element++] = mask << (offset % 64);
1250          offset += hstride * element_size;
1251       }
1252 
1253       rowbase += vstride * element_size;
1254    }
1255 
1256    assert(element == 0 || element == exec_size);
1257 }
1258 
1259 /**
1260  * Returns the number of registers accessed according to the \p access_mask
1261  */
1262 static int
registers_read(const uint64_t access_mask[static32])1263 registers_read(const uint64_t access_mask[static 32])
1264 {
1265    int regs_read = 0;
1266 
1267    for (unsigned i = 0; i < 32; i++) {
1268       if (access_mask[i] > 0xFFFFFFFF) {
1269          return 2;
1270       } else if (access_mask[i]) {
1271          regs_read = 1;
1272       }
1273    }
1274 
1275    return regs_read;
1276 }
1277 
1278 /**
1279  * Checks restrictions listed in "Region Alignment Rules" in the "Register
1280  * Region Restrictions" section.
1281  */
1282 static struct string
region_alignment_rules(const struct elk_isa_info * isa,const elk_inst * inst)1283 region_alignment_rules(const struct elk_isa_info *isa,
1284                        const elk_inst *inst)
1285 {
1286    const struct intel_device_info *devinfo = isa->devinfo;
1287    const struct elk_opcode_desc *desc =
1288       elk_opcode_desc(isa, elk_inst_opcode(isa, inst));
1289    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1290    unsigned exec_size = 1 << elk_inst_exec_size(devinfo, inst);
1291    uint64_t dst_access_mask[32], src0_access_mask[32], src1_access_mask[32];
1292    struct string error_msg = { .str = NULL, .len = 0 };
1293 
1294    if (num_sources == 3)
1295       return (struct string){};
1296 
1297    if (elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16)
1298       return (struct string){};
1299 
1300    if (inst_is_send(isa, inst))
1301       return (struct string){};
1302 
1303    memset(dst_access_mask, 0, sizeof(dst_access_mask));
1304    memset(src0_access_mask, 0, sizeof(src0_access_mask));
1305    memset(src1_access_mask, 0, sizeof(src1_access_mask));
1306 
1307    for (unsigned i = 0; i < num_sources; i++) {
1308       unsigned vstride, width, hstride, element_size, subreg;
1309       enum elk_reg_type type;
1310 
1311       /* In Direct Addressing mode, a source cannot span more than 2 adjacent
1312        * GRF registers.
1313        */
1314 
1315 #define DO_SRC(n)                                                              \
1316       if (elk_inst_src ## n ## _address_mode(devinfo, inst) !=                 \
1317           ELK_ADDRESS_DIRECT)                                                  \
1318          continue;                                                             \
1319                                                                                \
1320       if (elk_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1321           ELK_IMMEDIATE_VALUE)                                                 \
1322          continue;                                                             \
1323                                                                                \
1324       vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst));          \
1325       width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst));               \
1326       hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst));          \
1327       type = elk_inst_src ## n ## _type(devinfo, inst);                        \
1328       element_size = elk_reg_type_to_size(type);                               \
1329       subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1330       align1_access_mask(src ## n ## _access_mask,                             \
1331                          exec_size, element_size, subreg,                      \
1332                          vstride, width, hstride)
1333 
1334       if (i == 0) {
1335          DO_SRC(0);
1336       } else {
1337          DO_SRC(1);
1338       }
1339 #undef DO_SRC
1340 
1341       unsigned num_vstride = exec_size / width;
1342       unsigned num_hstride = width;
1343       unsigned vstride_elements = (num_vstride - 1) * vstride;
1344       unsigned hstride_elements = (num_hstride - 1) * hstride;
1345       unsigned offset = (vstride_elements + hstride_elements) * element_size +
1346                         subreg;
1347       ERROR_IF(offset >= 64 * reg_unit(devinfo),
1348                "A source cannot span more than 2 adjacent GRF registers");
1349    }
1350 
1351    if (desc->ndst == 0 || dst_is_null(devinfo, inst))
1352       return error_msg;
1353 
1354    unsigned stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1355    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1356    unsigned element_size = elk_reg_type_to_size(dst_type);
1357    unsigned subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1358    unsigned offset = ((exec_size - 1) * stride * element_size) + subreg;
1359    ERROR_IF(offset >= 64 * reg_unit(devinfo),
1360             "A destination cannot span more than 2 adjacent GRF registers");
1361 
1362    if (error_msg.str)
1363       return error_msg;
1364 
1365    /* On IVB/BYT, region parameters and execution size for DF are in terms of
1366     * 32-bit elements, so they are doubled. For evaluating the validity of an
1367     * instruction, we halve them.
1368     */
1369    if (devinfo->verx10 == 70 &&
1370        element_size == 8)
1371       element_size = 4;
1372 
1373    align1_access_mask(dst_access_mask, exec_size, element_size, subreg,
1374                       exec_size == 1 ? 0 : exec_size * stride,
1375                       exec_size == 1 ? 1 : exec_size,
1376                       exec_size == 1 ? 0 : stride);
1377 
1378    unsigned dst_regs = registers_read(dst_access_mask);
1379    unsigned src0_regs = registers_read(src0_access_mask);
1380    unsigned src1_regs = registers_read(src1_access_mask);
1381 
1382    /* The SNB, IVB, HSW, BDW, and CHV PRMs say:
1383     *
1384     *    When an instruction has a source region spanning two registers and a
1385     *    destination region contained in one register, the number of elements
1386     *    must be the same between two sources and one of the following must be
1387     *    true:
1388     *
1389     *       1. The destination region is entirely contained in the lower OWord
1390     *          of a register.
1391     *       2. The destination region is entirely contained in the upper OWord
1392     *          of a register.
1393     *       3. The destination elements are evenly split between the two OWords
1394     *          of a register.
1395     */
1396    if (devinfo->ver <= 8) {
1397       if (dst_regs == 1 && (src0_regs == 2 || src1_regs == 2)) {
1398          unsigned upper_oword_writes = 0, lower_oword_writes = 0;
1399 
1400          for (unsigned i = 0; i < exec_size; i++) {
1401             if (dst_access_mask[i] > 0x0000FFFF) {
1402                upper_oword_writes++;
1403             } else {
1404                assert(dst_access_mask[i] != 0);
1405                lower_oword_writes++;
1406             }
1407          }
1408 
1409          ERROR_IF(lower_oword_writes != 0 &&
1410                   upper_oword_writes != 0 &&
1411                   upper_oword_writes != lower_oword_writes,
1412                   "Writes must be to only one OWord or "
1413                   "evenly split between OWords");
1414       }
1415    }
1416 
1417    /* The IVB and HSW PRMs say:
1418     *
1419     *    When an instruction has a source region that spans two registers and
1420     *    the destination spans two registers, the destination elements must be
1421     *    evenly split between the two registers [...]
1422     *
1423     * The SNB PRM contains similar wording (but written in a much more
1424     * confusing manner).
1425     *
1426     * The BDW PRM says:
1427     *
1428     *    When destination spans two registers, the source may be one or two
1429     *    registers. The destination elements must be evenly split between the
1430     *    two registers.
1431     *
1432     * The SKL PRM says:
1433     *
1434     *    When destination of MATH instruction spans two registers, the
1435     *    destination elements must be evenly split between the two registers.
1436     *
1437     * It is not known whether this restriction applies to KBL other Gens after
1438     * SKL.
1439     */
1440    if (devinfo->ver <= 8 ||
1441        elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
1442 
1443       /* Nothing explicitly states that on Gen < 8 elements must be evenly
1444        * split between two destination registers in the two exceptional
1445        * source-region-spans-one-register cases, but since Broadwell requires
1446        * evenly split writes regardless of source region, we assume that it was
1447        * an oversight and require it.
1448        */
1449       if (dst_regs == 2) {
1450          unsigned upper_reg_writes = 0, lower_reg_writes = 0;
1451 
1452          for (unsigned i = 0; i < exec_size; i++) {
1453             if (dst_access_mask[i] > 0xFFFFFFFF) {
1454                upper_reg_writes++;
1455             } else {
1456                assert(dst_access_mask[i] != 0);
1457                lower_reg_writes++;
1458             }
1459          }
1460 
1461          ERROR_IF(upper_reg_writes != lower_reg_writes,
1462                   "Writes must be evenly split between the two "
1463                   "destination registers");
1464       }
1465    }
1466 
1467    /* The IVB and HSW PRMs say:
1468     *
1469     *    When an instruction has a source region that spans two registers and
1470     *    the destination spans two registers, the destination elements must be
1471     *    evenly split between the two registers and each destination register
1472     *    must be entirely derived from one source register.
1473     *
1474     *    Note: In such cases, the regioning parameters must ensure that the
1475     *    offset from the two source registers is the same.
1476     *
1477     * The SNB PRM contains similar wording (but written in a much more
1478     * confusing manner).
1479     *
1480     * There are effectively three rules stated here:
1481     *
1482     *    For an instruction with a source and a destination spanning two
1483     *    registers,
1484     *
1485     *       (1) destination elements must be evenly split between the two
1486     *           registers
1487     *       (2) all destination elements in a register must be derived
1488     *           from one source register
1489     *       (3) the offset (i.e. the starting location in each of the two
1490     *           registers spanned by a region) must be the same in the two
1491     *           registers spanned by a region
1492     *
1493     * It is impossible to violate rule (1) without violating (2) or (3), so we
1494     * do not attempt to validate it.
1495     */
1496    if (devinfo->ver <= 7 && dst_regs == 2) {
1497       for (unsigned i = 0; i < num_sources; i++) {
1498 #define DO_SRC(n)                                                             \
1499          if (src ## n ## _regs <= 1)                                          \
1500             continue;                                                         \
1501                                                                               \
1502          for (unsigned i = 0; i < exec_size; i++) {                           \
1503             if ((dst_access_mask[i] > 0xFFFFFFFF) !=                          \
1504                 (src ## n ## _access_mask[i] > 0xFFFFFFFF)) {                 \
1505                ERROR("Each destination register must be entirely derived "    \
1506                      "from one source register");                             \
1507                break;                                                         \
1508             }                                                                 \
1509          }                                                                    \
1510                                                                               \
1511          unsigned offset_0 =                                                  \
1512             elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst);               \
1513          unsigned offset_1 = offset_0;                                        \
1514                                                                               \
1515          for (unsigned i = 0; i < exec_size; i++) {                           \
1516             if (src ## n ## _access_mask[i] > 0xFFFFFFFF) {                   \
1517                offset_1 = __builtin_ctzll(src ## n ## _access_mask[i]) - 32;  \
1518                break;                                                         \
1519             }                                                                 \
1520          }                                                                    \
1521                                                                               \
1522          ERROR_IF(num_sources == 2 && offset_0 != offset_1,                   \
1523                   "The offset from the two source registers "                 \
1524                   "must be the same")
1525 
1526          if (i == 0) {
1527             DO_SRC(0);
1528          } else {
1529             DO_SRC(1);
1530          }
1531 #undef DO_SRC
1532       }
1533    }
1534 
1535    /* The IVB and HSW PRMs say:
1536     *
1537     *    When destination spans two registers, the source MUST span two
1538     *    registers. The exception to the above rule:
1539     *        1. When source is scalar, the source registers are not
1540     *           incremented.
1541     *        2. When source is packed integer Word and destination is packed
1542     *           integer DWord, the source register is not incremented by the
1543     *           source sub register is incremented.
1544     *
1545     * The SNB PRM does not contain this rule, but the internal documentation
1546     * indicates that it applies to SNB as well. We assume that the rule applies
1547     * to Gen <= 5 although their PRMs do not state it.
1548     *
1549     * While the documentation explicitly says in exception (2) that the
1550     * destination must be an integer DWord, the hardware allows at least a
1551     * float destination type as well. We emit such instructions from
1552     *
1553     *    elk_fs_visitor::emit_interpolation_setup_gfx6
1554     *    elk_fs_visitor::emit_fragcoord_interpolation
1555     *
1556     * and have for years with no ill effects.
1557     *
1558     * Additionally the simulator source code indicates that the real condition
1559     * is that the size of the destination type is 4 bytes.
1560     *
1561     * HSW PRMs also add a note to the second exception:
1562     *  "When lower 8 channels are disabled, the sub register of source1
1563     *   operand is not incremented. If the lower 8 channels are expected
1564     *   to be disabled, say by predication, the instruction must be split
1565     *   into pair of simd8 operations."
1566     *
1567     * We can't reliably know if the channels won't be disabled due to,
1568     * for example, IMASK. So, play it safe and disallow packed-word exception
1569     * for src1.
1570     */
1571    if (devinfo->ver <= 7 && dst_regs == 2) {
1572       enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1573       bool dst_is_packed_dword =
1574          is_packed(exec_size * stride, exec_size, stride) &&
1575          elk_reg_type_to_size(dst_type) == 4;
1576 
1577       for (unsigned i = 0; i < num_sources; i++) {
1578 #define DO_SRC(n)                                                                  \
1579          unsigned vstride, width, hstride;                                         \
1580          vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst));           \
1581          width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst));                \
1582          hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst));           \
1583          bool src ## n ## _is_packed_word =                                        \
1584             n != 1 && is_packed(vstride, width, hstride) &&                        \
1585             (elk_inst_src ## n ## _type(devinfo, inst) == ELK_REGISTER_TYPE_W ||   \
1586              elk_inst_src ## n ## _type(devinfo, inst) == ELK_REGISTER_TYPE_UW);   \
1587                                                                                    \
1588          ERROR_IF(src ## n ## _regs == 1 &&                                        \
1589                   !src ## n ## _has_scalar_region(devinfo, inst) &&                \
1590                   !(dst_is_packed_dword && src ## n ## _is_packed_word),           \
1591                   "When the destination spans two registers, the source must "     \
1592                   "span two registers\n" ERROR_INDENT "(exceptions for scalar "    \
1593                   "sources, and packed-word to packed-dword expansion for src0)")
1594 
1595          if (i == 0) {
1596             DO_SRC(0);
1597          } else {
1598             DO_SRC(1);
1599          }
1600 #undef DO_SRC
1601       }
1602    }
1603 
1604    return error_msg;
1605 }
1606 
1607 static struct string
vector_immediate_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)1608 vector_immediate_restrictions(const struct elk_isa_info *isa,
1609                               const elk_inst *inst)
1610 {
1611    const struct intel_device_info *devinfo = isa->devinfo;
1612 
1613    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1614    struct string error_msg = { .str = NULL, .len = 0 };
1615 
1616    if (num_sources == 3 || num_sources == 0)
1617       return (struct string){};
1618 
1619    unsigned file = num_sources == 1 ?
1620                    elk_inst_src0_reg_file(devinfo, inst) :
1621                    elk_inst_src1_reg_file(devinfo, inst);
1622    if (file != ELK_IMMEDIATE_VALUE)
1623       return (struct string){};
1624 
1625    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1626    unsigned dst_type_size = elk_reg_type_to_size(dst_type);
1627    unsigned dst_subreg = elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 ?
1628                          elk_inst_dst_da1_subreg_nr(devinfo, inst) : 0;
1629    unsigned dst_stride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1630    enum elk_reg_type type = num_sources == 1 ?
1631                             elk_inst_src0_type(devinfo, inst) :
1632                             elk_inst_src1_type(devinfo, inst);
1633 
1634    /* The PRMs say:
1635     *
1636     *    When an immediate vector is used in an instruction, the destination
1637     *    must be 128-bit aligned with destination horizontal stride equivalent
1638     *    to a word for an immediate integer vector (v) and equivalent to a
1639     *    DWord for an immediate float vector (vf).
1640     *
1641     * The text has not been updated for the addition of the immediate unsigned
1642     * integer vector type (uv) on SNB, but presumably the same restriction
1643     * applies.
1644     */
1645    switch (type) {
1646    case ELK_REGISTER_TYPE_V:
1647    case ELK_REGISTER_TYPE_UV:
1648    case ELK_REGISTER_TYPE_VF:
1649       ERROR_IF(dst_subreg % (128 / 8) != 0,
1650                "Destination must be 128-bit aligned in order to use immediate "
1651                "vector types");
1652 
1653       if (type == ELK_REGISTER_TYPE_VF) {
1654          ERROR_IF(dst_type_size * dst_stride != 4,
1655                   "Destination must have stride equivalent to dword in order "
1656                   "to use the VF type");
1657       } else {
1658          ERROR_IF(dst_type_size * dst_stride != 2,
1659                   "Destination must have stride equivalent to word in order "
1660                   "to use the V or UV type");
1661       }
1662       break;
1663    default:
1664       break;
1665    }
1666 
1667    return error_msg;
1668 }
1669 
1670 static struct string
special_requirements_for_handling_double_precision_data_types(const struct elk_isa_info * isa,const elk_inst * inst)1671 special_requirements_for_handling_double_precision_data_types(
1672                                        const struct elk_isa_info *isa,
1673                                        const elk_inst *inst)
1674 {
1675    const struct intel_device_info *devinfo = isa->devinfo;
1676 
1677    unsigned num_sources = elk_num_sources_from_inst(isa, inst);
1678    struct string error_msg = { .str = NULL, .len = 0 };
1679 
1680    if (num_sources == 3 || num_sources == 0)
1681       return (struct string){};
1682 
1683    enum elk_reg_type exec_type = execution_type(isa, inst);
1684    unsigned exec_type_size = elk_reg_type_to_size(exec_type);
1685 
1686    enum elk_reg_file dst_file = elk_inst_dst_reg_file(devinfo, inst);
1687    enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1688    unsigned dst_type_size = elk_reg_type_to_size(dst_type);
1689    unsigned dst_hstride = STRIDE(elk_inst_dst_hstride(devinfo, inst));
1690    unsigned dst_reg = elk_inst_dst_da_reg_nr(devinfo, inst);
1691    unsigned dst_subreg = elk_inst_dst_da1_subreg_nr(devinfo, inst);
1692    unsigned dst_address_mode = elk_inst_dst_address_mode(devinfo, inst);
1693 
1694    bool is_integer_dword_multiply =
1695       devinfo->ver >= 8 &&
1696       elk_inst_opcode(isa, inst) == ELK_OPCODE_MUL &&
1697       (elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_D ||
1698        elk_inst_src0_type(devinfo, inst) == ELK_REGISTER_TYPE_UD) &&
1699       (elk_inst_src1_type(devinfo, inst) == ELK_REGISTER_TYPE_D ||
1700        elk_inst_src1_type(devinfo, inst) == ELK_REGISTER_TYPE_UD);
1701 
1702    const bool is_double_precision =
1703       dst_type_size == 8 || exec_type_size == 8 || is_integer_dword_multiply;
1704 
1705    for (unsigned i = 0; i < num_sources; i++) {
1706       unsigned vstride, width, hstride, type_size, reg, subreg, address_mode;
1707       bool is_scalar_region;
1708       enum elk_reg_file file;
1709       enum elk_reg_type type;
1710 
1711 #define DO_SRC(n)                                                              \
1712       if (elk_inst_src ## n ## _reg_file(devinfo, inst) ==                     \
1713           ELK_IMMEDIATE_VALUE)                                                 \
1714          continue;                                                             \
1715                                                                                \
1716       is_scalar_region = src ## n ## _has_scalar_region(devinfo, inst);        \
1717       vstride = STRIDE(elk_inst_src ## n ## _vstride(devinfo, inst));          \
1718       width = WIDTH(elk_inst_src ## n ## _width(devinfo, inst));               \
1719       hstride = STRIDE(elk_inst_src ## n ## _hstride(devinfo, inst));          \
1720       file = elk_inst_src ## n ## _reg_file(devinfo, inst);                    \
1721       type = elk_inst_src ## n ## _type(devinfo, inst);                        \
1722       type_size = elk_reg_type_to_size(type);                                  \
1723       reg = elk_inst_src ## n ## _da_reg_nr(devinfo, inst);                    \
1724       subreg = elk_inst_src ## n ## _da1_subreg_nr(devinfo, inst);             \
1725       address_mode = elk_inst_src ## n ## _address_mode(devinfo, inst)
1726 
1727       if (i == 0) {
1728          DO_SRC(0);
1729       } else {
1730          DO_SRC(1);
1731       }
1732 #undef DO_SRC
1733 
1734       const unsigned src_stride = (hstride ? hstride : vstride) * type_size;
1735       const unsigned dst_stride = dst_hstride * dst_type_size;
1736 
1737       /* The PRMs say that for CHV, BXT:
1738        *
1739        *    When source or destination datatype is 64b or operation is integer
1740        *    DWord multiply, regioning in Align1 must follow these rules:
1741        *
1742        *    1. Source and Destination horizontal stride must be aligned to the
1743        *       same qword.
1744        *    2. Regioning must ensure Src.Vstride = Src.Width * Src.Hstride.
1745        *    3. Source and Destination offset must be the same, except the case
1746        *       of scalar source.
1747        */
1748       if (is_double_precision &&
1749           elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1 &&
1750           devinfo->platform == INTEL_PLATFORM_CHV) {
1751          ERROR_IF(!is_scalar_region &&
1752                   (src_stride % 8 != 0 ||
1753                    dst_stride % 8 != 0 ||
1754                    src_stride != dst_stride),
1755                   "Source and destination horizontal stride must equal and a "
1756                   "multiple of a qword when the execution type is 64-bit");
1757 
1758          ERROR_IF(vstride != width * hstride,
1759                   "Vstride must be Width * Hstride when the execution type is "
1760                   "64-bit");
1761 
1762          ERROR_IF(!is_scalar_region && dst_subreg != subreg,
1763                   "Source and destination offset must be the same when the "
1764                   "execution type is 64-bit");
1765       }
1766 
1767       /* The PRMs say that for CHV, BXT:
1768        *
1769        *    When source or destination datatype is 64b or operation is integer
1770        *    DWord multiply, indirect addressing must not be used.
1771        */
1772       if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1773          ERROR_IF(ELK_ADDRESS_REGISTER_INDIRECT_REGISTER == address_mode ||
1774                   ELK_ADDRESS_REGISTER_INDIRECT_REGISTER == dst_address_mode,
1775                   "Indirect addressing is not allowed when the execution type "
1776                   "is 64-bit");
1777       }
1778 
1779       /* The PRMs say that for CHV, BXT:
1780        *
1781        *    ARF registers must never be used with 64b datatype or when
1782        *    operation is integer DWord multiply.
1783        *
1784        * We assume that the restriction does not apply to the null register.
1785        */
1786       if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1787          ERROR_IF(elk_inst_opcode(isa, inst) == ELK_OPCODE_MAC ||
1788                   elk_inst_acc_wr_control(devinfo, inst) ||
1789                   (ELK_ARCHITECTURE_REGISTER_FILE == file &&
1790                    reg != ELK_ARF_NULL) ||
1791                   (ELK_ARCHITECTURE_REGISTER_FILE == dst_file &&
1792                    dst_reg != ELK_ARF_NULL),
1793                   "Architecture registers cannot be used when the execution "
1794                   "type is 64-bit");
1795       }
1796    }
1797 
1798    /* The PRMs say that for BDW, SKL:
1799     *
1800     *    If Align16 is required for an operation with QW destination and non-QW
1801     *    source datatypes, the execution size cannot exceed 2.
1802     *
1803     * We assume that the restriction applies to all Gfx8+ parts.
1804     */
1805    if (is_double_precision && devinfo->ver >= 8) {
1806       enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1807       enum elk_reg_type src1_type =
1808          num_sources > 1 ? elk_inst_src1_type(devinfo, inst) : src0_type;
1809       unsigned src0_type_size = elk_reg_type_to_size(src0_type);
1810       unsigned src1_type_size = elk_reg_type_to_size(src1_type);
1811 
1812       ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_16 &&
1813                dst_type_size == 8 &&
1814                (src0_type_size != 8 || src1_type_size != 8) &&
1815                elk_inst_exec_size(devinfo, inst) > ELK_EXECUTE_2,
1816                "In Align16 exec size cannot exceed 2 with a QWord destination "
1817                "and a non-QWord source");
1818    }
1819 
1820    /* The PRMs say that for CHV, BXT:
1821     *
1822     *    When source or destination datatype is 64b or operation is integer
1823     *    DWord multiply, DepCtrl must not be used.
1824     */
1825    if (is_double_precision && devinfo->platform == INTEL_PLATFORM_CHV) {
1826       ERROR_IF(elk_inst_no_dd_check(devinfo, inst) ||
1827                elk_inst_no_dd_clear(devinfo, inst),
1828                "DepCtrl is not allowed when the execution type is 64-bit");
1829    }
1830 
1831    return error_msg;
1832 }
1833 
1834 static struct string
instruction_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)1835 instruction_restrictions(const struct elk_isa_info *isa,
1836                          const elk_inst *inst)
1837 {
1838    const struct intel_device_info *devinfo = isa->devinfo;
1839    struct string error_msg = { .str = NULL, .len = 0 };
1840 
1841    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_CMP ||
1842        elk_inst_opcode(isa, inst) == ELK_OPCODE_CMPN) {
1843       if (devinfo->ver <= 7) {
1844          /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit
1845           * ISA) says:
1846           *
1847           *    Accumulator cannot be destination, implicit or explicit. The
1848           *    destination must be a general register or the null register.
1849           *
1850           * Page 77 of the Haswell PRM Volume 2b contains the same text.  The
1851           * 965G PRMs contain similar text.
1852           *
1853           * Page 864 (page 880 of the PDF) of the Broadwell PRM Volume 7 says:
1854           *
1855           *    For the cmp and cmpn instructions, remove the accumulator
1856           *    restrictions.
1857           */
1858          ERROR_IF(elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
1859                   elk_inst_dst_da_reg_nr(devinfo, inst) != ELK_ARF_NULL,
1860                   "Accumulator cannot be destination, implicit or explicit.");
1861       }
1862 
1863       /* Page 166 of the Ivy Bridge PRM Volume 4 part 3 (Execution Unit ISA)
1864        * says:
1865        *
1866        *    If the destination is the null register, the {Switch} instruction
1867        *    option must be used.
1868        *
1869        * Page 77 of the Haswell PRM Volume 2b contains the same text.
1870        */
1871       if (devinfo->ver == 7) {
1872          ERROR_IF(dst_is_null(devinfo, inst) &&
1873                   elk_inst_thread_control(devinfo, inst) != ELK_THREAD_SWITCH,
1874                   "If the destination is the null register, the {Switch} "
1875                   "instruction option must be used.");
1876       }
1877 
1878       ERROR_IF(elk_inst_cond_modifier(devinfo, inst) == ELK_CONDITIONAL_NONE,
1879                "CMP (or CMPN) must have a condition.");
1880    }
1881 
1882    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_SEL) {
1883       if (devinfo->ver < 6) {
1884          ERROR_IF(elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE,
1885                   "SEL must not have a condition modifier");
1886          ERROR_IF(elk_inst_pred_control(devinfo, inst) == ELK_PREDICATE_NONE,
1887                   "SEL must be predicated");
1888       } else {
1889          ERROR_IF((elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE) ==
1890                   (elk_inst_pred_control(devinfo, inst) != ELK_PREDICATE_NONE),
1891                   "SEL must either be predicated or have a condition modifiers");
1892       }
1893    }
1894 
1895    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MUL) {
1896       const enum elk_reg_type src0_type = elk_inst_src0_type(devinfo, inst);
1897       const enum elk_reg_type src1_type = elk_inst_src1_type(devinfo, inst);
1898       const enum elk_reg_type dst_type = elk_inst_dst_type(devinfo, inst);
1899 
1900       if (devinfo->ver == 6) {
1901          /* Page 223 of the Sandybridge PRM volume 4 part 2 says:
1902           *
1903           *    [DevSNB]: When multiple (sic) a DW and a W, the W has to be on
1904           *    src0, and the DW has to be on src1.
1905           *
1906           * This text appears only in the Sandybridge PRMw.
1907           */
1908          ERROR_IF(elk_reg_type_is_integer(src0_type) &&
1909                   type_sz(src0_type) == 4 && type_sz(src1_type) < 4,
1910                   "When multiplying a DW and any lower precision integer, the "
1911                   "DW operand must be src1.");
1912       } else if (devinfo->ver >= 7) {
1913          /* Page 966 (page 982 of the PDF) of Broadwell PRM volume 2a says:
1914           *
1915           *    When multiplying a DW and any lower precision integer, the DW
1916           *    operand must on src0.
1917           *
1918           * Ivy Bridge, Haswell, Skylake, and Ice Lake PRMs contain the same
1919           * text.
1920           */
1921          ERROR_IF(elk_reg_type_is_integer(src1_type) &&
1922                   type_sz(src0_type) < 4 && type_sz(src1_type) == 4,
1923                   "When multiplying a DW and any lower precision integer, the "
1924                   "DW operand must be src0.");
1925       }
1926 
1927       if (devinfo->ver <= 7) {
1928          /* Section 14.2.28 of Intel 965 Express Chipset PRM volume 4 says:
1929           *
1930           *    Source operands cannot be an accumulator register.
1931           *
1932           * Iron Lake, Sandybridge, and Ivy Bridge PRMs have the same text.
1933           * Haswell does not.  Given that later PRMs have different
1934           * restrictions on accumulator sources (see below), it seems most
1935           * likely that Haswell shares the Ivy Bridge restriction.
1936           */
1937          ERROR_IF(src0_is_acc(devinfo, inst) || src1_is_acc(devinfo, inst),
1938                   "Source operands cannot be an accumulator register.");
1939       } else {
1940          /* Page 971 (page 987 of the PDF), section "Accumulator
1941           * Restrictions," of the Broadwell PRM volume 7 says:
1942           *
1943           *    Integer source operands cannot be accumulators.
1944           *
1945           * The Skylake and Ice Lake PRMs contain the same text.
1946           */
1947          ERROR_IF((src0_is_acc(devinfo, inst) &&
1948                    elk_reg_type_is_integer(src0_type)) ||
1949                   (src1_is_acc(devinfo, inst) &&
1950                    elk_reg_type_is_integer(src1_type)),
1951                   "Integer source operands cannot be accumulators.");
1952       }
1953 
1954       if (devinfo->ver <= 6) {
1955          /* Page 223 of the Sandybridge PRM volume 4 part 2 says:
1956           *
1957           *    Dword integer source is not allowed for this instruction in
1958           *    float execution mode.  In other words, if one source is of type
1959           *    float (:f, :vf), the other source cannot be of type dword
1960           *    integer (:ud or :d).
1961           *
1962           * G965 and Iron Lake PRMs have similar text.  Later GPUs do not
1963           * allow mixed source types at all, but that restriction should be
1964           * handled elsewhere.
1965           */
1966          ERROR_IF(execution_type(isa, inst) == ELK_REGISTER_TYPE_F &&
1967                   (src0_type == ELK_REGISTER_TYPE_UD ||
1968                    src0_type == ELK_REGISTER_TYPE_D ||
1969                    src1_type == ELK_REGISTER_TYPE_UD ||
1970                    src1_type == ELK_REGISTER_TYPE_D),
1971                   "Dword integer source is not allowed for this instruction in"
1972                   "float execution mode.");
1973       }
1974 
1975       if (devinfo->ver <= 7) {
1976          /* Page 118 of the Haswell PRM volume 2b says:
1977           *
1978           *    When operating on integers with at least one of the source
1979           *    being a DWord type (signed or unsigned), the destination cannot
1980           *    be floating-point (implementation note: the data converter only
1981           *    looks at the low 34 bits of the result).
1982           *
1983           * G965, Iron Lake, Sandybridge, and Ivy Bridge have similar text.
1984           * Later GPUs do not allow mixed source and destination types at all,
1985           * but that restriction should be handled elsewhere.
1986           */
1987          ERROR_IF(dst_type == ELK_REGISTER_TYPE_F &&
1988                   (src0_type == ELK_REGISTER_TYPE_UD ||
1989                    src0_type == ELK_REGISTER_TYPE_D ||
1990                    src1_type == ELK_REGISTER_TYPE_UD ||
1991                    src1_type == ELK_REGISTER_TYPE_D),
1992                   "Float destination type not allowed with DWord source type.");
1993       }
1994 
1995       if (devinfo->ver == 8) {
1996          /* Page 966 (page 982 of the PDF) of the Broadwell PRM volume 2a
1997           * says:
1998           *
1999           *    When multiplying DW x DW, the dst cannot be accumulator.
2000           *
2001           * This text also appears in the Cherry Trail / Braswell PRM, but it
2002           * does not appear in any other PRM.
2003           */
2004          ERROR_IF((src0_type == ELK_REGISTER_TYPE_UD ||
2005                    src0_type == ELK_REGISTER_TYPE_D) &&
2006                   (src1_type == ELK_REGISTER_TYPE_UD ||
2007                    src1_type == ELK_REGISTER_TYPE_D) &&
2008                   elk_inst_dst_reg_file(devinfo, inst) == ELK_ARCHITECTURE_REGISTER_FILE &&
2009                   elk_inst_dst_da_reg_nr(devinfo, inst) != ELK_ARF_NULL,
2010                   "When multiplying DW x DW, the dst cannot be accumulator.");
2011       }
2012 
2013       /* Page 935 (page 951 of the PDF) of the Ice Lake PRM volume 2a says:
2014        *
2015        *    When multiplying integer data types, if one of the sources is a
2016        *    DW, the resulting full precision data is stored in the
2017        *    accumulator. However, if the destination data type is either W or
2018        *    DW, the low bits of the result are written to the destination
2019        *    register and the remaining high bits are discarded. This results
2020        *    in undefined Overflow and Sign flags. Therefore, conditional
2021        *    modifiers and saturation (.sat) cannot be used in this case.
2022        *
2023        * Similar text appears in every version of the PRM.
2024        *
2025        * The wording of the last sentence is not very clear.  It could either
2026        * be interpreted as "conditional modifiers combined with saturation
2027        * cannot be used" or "neither conditional modifiers nor saturation can
2028        * be used."  I have interpreted it as the latter primarily because that
2029        * is the more restrictive interpretation.
2030        */
2031       ERROR_IF((src0_type == ELK_REGISTER_TYPE_UD ||
2032                 src0_type == ELK_REGISTER_TYPE_D ||
2033                 src1_type == ELK_REGISTER_TYPE_UD ||
2034                 src1_type == ELK_REGISTER_TYPE_D) &&
2035                (dst_type == ELK_REGISTER_TYPE_UD ||
2036                 dst_type == ELK_REGISTER_TYPE_D ||
2037                 dst_type == ELK_REGISTER_TYPE_UW ||
2038                 dst_type == ELK_REGISTER_TYPE_W) &&
2039                (elk_inst_saturate(devinfo, inst) != 0 ||
2040                 elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE),
2041                "Neither Saturate nor conditional modifier allowed with DW "
2042                "integer multiply.");
2043    }
2044 
2045    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_MATH) {
2046       unsigned math_function = elk_inst_math_function(devinfo, inst);
2047       switch (math_function) {
2048       case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER:
2049       case ELK_MATH_FUNCTION_INT_DIV_QUOTIENT:
2050       case ELK_MATH_FUNCTION_INT_DIV_REMAINDER: {
2051          /* Page 442 of the Broadwell PRM Volume 2a "Extended Math Function" says:
2052           *    INT DIV function does not support source modifiers.
2053           * Bspec 6647 extends it back to Ivy Bridge.
2054           */
2055          bool src0_valid = !elk_inst_src0_negate(devinfo, inst) &&
2056                            !elk_inst_src0_abs(devinfo, inst);
2057          bool src1_valid = !elk_inst_src1_negate(devinfo, inst) &&
2058                            !elk_inst_src1_abs(devinfo, inst);
2059          ERROR_IF(!src0_valid || !src1_valid,
2060                   "INT DIV function does not support source modifiers.");
2061          break;
2062       }
2063       default:
2064          break;
2065       }
2066    }
2067 
2068    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_OR ||
2069        elk_inst_opcode(isa, inst) == ELK_OPCODE_AND ||
2070        elk_inst_opcode(isa, inst) == ELK_OPCODE_XOR ||
2071        elk_inst_opcode(isa, inst) == ELK_OPCODE_NOT) {
2072       if (devinfo->ver >= 8) {
2073          /* While the behavior of the negate source modifier is defined as
2074           * logical not, the behavior of abs source modifier is not
2075           * defined. Disallow it to be safe.
2076           */
2077          ERROR_IF(elk_inst_src0_abs(devinfo, inst),
2078                   "Behavior of abs source modifier in logic ops is undefined.");
2079          ERROR_IF(elk_inst_opcode(isa, inst) != ELK_OPCODE_NOT &&
2080                   elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE &&
2081                   elk_inst_src1_abs(devinfo, inst),
2082                   "Behavior of abs source modifier in logic ops is undefined.");
2083 
2084          /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2085           *
2086           *    Source modifier is not allowed if source is an accumulator.
2087           *
2088           * The same text also appears for OR, NOT, and XOR instructions.
2089           */
2090          ERROR_IF((elk_inst_src0_abs(devinfo, inst) ||
2091                    elk_inst_src0_negate(devinfo, inst)) &&
2092                   src0_is_acc(devinfo, inst),
2093                   "Source modifier is not allowed if source is an accumulator.");
2094          ERROR_IF(elk_num_sources_from_inst(isa, inst) > 1 &&
2095                   (elk_inst_src1_abs(devinfo, inst) ||
2096                    elk_inst_src1_negate(devinfo, inst)) &&
2097                   src1_is_acc(devinfo, inst),
2098                   "Source modifier is not allowed if source is an accumulator.");
2099       }
2100 
2101       /* Page 479 (page 495 of the PDF) of the Broadwell PRM volume 2a says:
2102        *
2103        *    This operation does not produce sign or overflow conditions. Only
2104        *    the .e/.z or .ne/.nz conditional modifiers should be used.
2105        *
2106        * The same text also appears for OR, NOT, and XOR instructions.
2107        *
2108        * Per the comment around nir_op_imod in elk_fs_nir.cpp, we have
2109        * determined this to not be true. The only conditions that seem
2110        * absolutely sketchy are O, R, and U.  Some OpenGL shaders from Doom
2111        * 2016 have been observed to generate and.g and operate correctly.
2112        */
2113       const enum elk_conditional_mod cmod =
2114          elk_inst_cond_modifier(devinfo, inst);
2115       ERROR_IF(cmod == ELK_CONDITIONAL_O ||
2116                cmod == ELK_CONDITIONAL_R ||
2117                cmod == ELK_CONDITIONAL_U,
2118                "O, R, and U conditional modifiers should not be used.");
2119    }
2120 
2121    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_BFI2) {
2122       ERROR_IF(elk_inst_cond_modifier(devinfo, inst) != ELK_CONDITIONAL_NONE,
2123                "BFI2 cannot have conditional modifier");
2124 
2125       ERROR_IF(elk_inst_saturate(devinfo, inst),
2126                "BFI2 cannot have saturate modifier");
2127 
2128       ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1,
2129                "BFI2 cannot have Align1");
2130 
2131       enum elk_reg_type dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
2132 
2133       ERROR_IF(dst_type != ELK_REGISTER_TYPE_D &&
2134                dst_type != ELK_REGISTER_TYPE_UD,
2135                "BFI2 destination type must be D or UD");
2136 
2137       for (unsigned s = 0; s < 3; s++) {
2138          enum elk_reg_type src_type = elk_inst_3src_a16_src_type(devinfo, inst);
2139 
2140          ERROR_IF(src_type != dst_type,
2141                   "BFI2 source type must match destination type");
2142       }
2143    }
2144 
2145    if (elk_inst_opcode(isa, inst) == ELK_OPCODE_CSEL) {
2146       ERROR_IF(elk_inst_pred_control(devinfo, inst) != ELK_PREDICATE_NONE,
2147                "CSEL cannot be predicated");
2148 
2149       /* CSEL is CMP and SEL fused into one. The condition modifier, which
2150        * does not actually modify the flags, controls the built-in comparison.
2151        */
2152       ERROR_IF(elk_inst_cond_modifier(devinfo, inst) == ELK_CONDITIONAL_NONE,
2153                "CSEL must have a condition.");
2154 
2155       ERROR_IF(elk_inst_access_mode(devinfo, inst) == ELK_ALIGN_1,
2156                "CSEL cannot have Align1.");
2157       enum elk_reg_type dst_type = elk_inst_3src_a16_dst_type(devinfo, inst);
2158 
2159       if (devinfo->ver < 8) {
2160          ERROR_IF(devinfo->ver < 8, "CSEL not supported before Gfx8");
2161       } else {
2162          ERROR_IF(dst_type != ELK_REGISTER_TYPE_F &&
2163                   dst_type != ELK_REGISTER_TYPE_HF &&
2164                   dst_type != ELK_REGISTER_TYPE_D &&
2165                   dst_type != ELK_REGISTER_TYPE_W,
2166                   "CSEL destination type must be F, HF, D, or W");
2167       }
2168 
2169       for (unsigned s = 0; s < 3; s++) {
2170          enum elk_reg_type src_type = elk_inst_3src_a16_src_type(devinfo, inst);
2171 
2172          ERROR_IF(src_type != dst_type,
2173                   "CSEL source type must match destination type");
2174       }
2175    }
2176 
2177    return error_msg;
2178 }
2179 
2180 static struct string
send_descriptor_restrictions(const struct elk_isa_info * isa,const elk_inst * inst)2181 send_descriptor_restrictions(const struct elk_isa_info *isa,
2182                              const elk_inst *inst)
2183 {
2184    const struct intel_device_info *devinfo = isa->devinfo;
2185    struct string error_msg = { .str = NULL, .len = 0 };
2186 
2187    if (inst_is_send(isa, inst)) {
2188       /* We can only validate immediate descriptors */
2189       if (elk_inst_src1_reg_file(devinfo, inst) != ELK_IMMEDIATE_VALUE)
2190          return error_msg;
2191    } else {
2192       return error_msg;
2193    }
2194 
2195    if (elk_inst_sfid(devinfo, inst) == ELK_SFID_URB) {
2196       /* Gfx4 doesn't have a "header present" bit in the SEND message. */
2197       ERROR_IF(devinfo->ver > 4 && !elk_inst_header_present(devinfo, inst),
2198                "Header must be present for all URB messages.");
2199 
2200       switch (elk_inst_urb_opcode(devinfo, inst)) {
2201       case ELK_URB_OPCODE_WRITE_HWORD:
2202          break;
2203 
2204       /* case FF_SYNC: */
2205       case ELK_URB_OPCODE_WRITE_OWORD:
2206          /* Gfx5 / Gfx6 FF_SYNC message and Gfx7+ URB_WRITE_OWORD have the
2207           * same opcode value.
2208           */
2209          if (devinfo->ver == 5 || devinfo->ver == 6) {
2210             ERROR_IF(elk_inst_urb_global_offset(devinfo, inst) != 0,
2211                      "FF_SYNC global offset must be zero.");
2212             ERROR_IF(elk_inst_urb_swizzle_control(devinfo, inst) != 0,
2213                      "FF_SYNC swizzle control must be zero.");
2214             ERROR_IF(elk_inst_urb_used(devinfo, inst) != 0,
2215                      "FF_SYNC used must be zero.");
2216             ERROR_IF(elk_inst_urb_complete(devinfo, inst) != 0,
2217                      "FF_SYNC complete must be zero.");
2218 
2219             /* Volume 4 part 2 of the Sandybridge PRM (page 28) says:
2220              *
2221              *    A message response (writeback) length of 1 GRF will be
2222              *    indicated on the ‘send’ instruction if the thread requires
2223              *    response data and/or synchronization.
2224              */
2225             ERROR_IF((unsigned)elk_inst_rlen(devinfo, inst) > 1,
2226                      "FF_SYNC read length must be 0 or 1.");
2227          } else {
2228             ERROR_IF(devinfo->ver < 7,
2229                      "URB OWORD write messages only valid on gfx >= 7");
2230          }
2231          break;
2232 
2233       case ELK_URB_OPCODE_READ_HWORD:
2234       case ELK_URB_OPCODE_READ_OWORD:
2235          ERROR_IF(devinfo->ver < 7,
2236                   "URB read messages only valid on gfx >= 7");
2237          break;
2238 
2239       case GFX7_URB_OPCODE_ATOMIC_MOV:
2240       case GFX7_URB_OPCODE_ATOMIC_INC:
2241          ERROR_IF(devinfo->ver < 7,
2242                   "URB atomic move and increment messages only valid on gfx >= 7");
2243          break;
2244 
2245       case GFX8_URB_OPCODE_ATOMIC_ADD:
2246          /* The Haswell PRM lists this opcode as valid on page 317. */
2247          ERROR_IF(devinfo->verx10 < 75,
2248                   "URB atomic add message only valid on gfx >= 7.5");
2249          break;
2250 
2251       case GFX8_URB_OPCODE_SIMD8_READ:
2252          ERROR_IF(elk_inst_rlen(devinfo, inst) == 0,
2253                   "URB SIMD8 read message must read some data.");
2254          FALLTHROUGH;
2255 
2256       case GFX8_URB_OPCODE_SIMD8_WRITE:
2257          ERROR_IF(devinfo->ver < 8,
2258                   "URB SIMD8 messages only valid on gfx >= 8");
2259          break;
2260 
2261       default:
2262          ERROR_IF(true, "Invalid URB message");
2263          break;
2264       }
2265    }
2266 
2267    return error_msg;
2268 }
2269 
2270 bool
elk_validate_instruction(const struct elk_isa_info * isa,const elk_inst * inst,int offset,unsigned inst_size,struct elk_disasm_info * disasm)2271 elk_validate_instruction(const struct elk_isa_info *isa,
2272                          const elk_inst *inst, int offset,
2273                          unsigned inst_size,
2274                          struct elk_disasm_info *disasm)
2275 {
2276    struct string error_msg = { .str = NULL, .len = 0 };
2277 
2278    if (is_unsupported_inst(isa, inst)) {
2279       ERROR("Instruction not supported on this Gen");
2280    } else {
2281       CHECK(invalid_values);
2282 
2283       if (error_msg.str == NULL) {
2284          CHECK(sources_not_null);
2285          CHECK(send_restrictions);
2286          CHECK(general_restrictions_based_on_operand_types);
2287          CHECK(general_restrictions_on_region_parameters);
2288          CHECK(special_restrictions_for_mixed_float_mode);
2289          CHECK(region_alignment_rules);
2290          CHECK(vector_immediate_restrictions);
2291          CHECK(special_requirements_for_handling_double_precision_data_types);
2292          CHECK(instruction_restrictions);
2293          CHECK(send_descriptor_restrictions);
2294       }
2295    }
2296 
2297    if (error_msg.str && disasm) {
2298       elk_disasm_insert_error(disasm, offset, inst_size, error_msg.str);
2299    }
2300    free(error_msg.str);
2301 
2302    return error_msg.len == 0;
2303 }
2304 
2305 bool
elk_validate_instructions(const struct elk_isa_info * isa,const void * assembly,int start_offset,int end_offset,struct elk_disasm_info * disasm)2306 elk_validate_instructions(const struct elk_isa_info *isa,
2307                           const void *assembly, int start_offset, int end_offset,
2308                           struct elk_disasm_info *disasm)
2309 {
2310    const struct intel_device_info *devinfo = isa->devinfo;
2311    bool valid = true;
2312 
2313    for (int src_offset = start_offset; src_offset < end_offset;) {
2314       const elk_inst *inst = assembly + src_offset;
2315       bool is_compact = elk_inst_cmpt_control(devinfo, inst);
2316       unsigned inst_size = is_compact ? sizeof(elk_compact_inst)
2317                                       : sizeof(elk_inst);
2318       elk_inst uncompacted;
2319 
2320       if (is_compact) {
2321          elk_compact_inst *compacted = (void *)inst;
2322          elk_uncompact_instruction(isa, &uncompacted, compacted);
2323          inst = &uncompacted;
2324       }
2325 
2326       bool v = elk_validate_instruction(isa, inst, src_offset,
2327                                         inst_size, disasm);
2328       valid = valid && v;
2329 
2330       src_offset += inst_size;
2331    }
2332 
2333    return valid;
2334 }
2335