xref: /aosp_15_r20/external/mesa3d/src/amd/compiler/aco_validate.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2018 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "aco_ir.h"
8 
9 #include "util/memstream.h"
10 #include "util/ralloc.h"
11 
12 #include <array>
13 #include <map>
14 #include <set>
15 #include <vector>
16 
17 namespace aco {
18 
19 static void
aco_log(Program * program,enum aco_compiler_debug_level level,const char * prefix,const char * file,unsigned line,const char * fmt,va_list args)20 aco_log(Program* program, enum aco_compiler_debug_level level, const char* prefix, const char* file,
21         unsigned line, const char* fmt, va_list args)
22 {
23    char* msg;
24 
25    if (program->debug.shorten_messages) {
26       msg = ralloc_vasprintf(NULL, fmt, args);
27    } else {
28       msg = ralloc_strdup(NULL, prefix);
29       ralloc_asprintf_append(&msg, "    In file %s:%u\n", file, line);
30       ralloc_asprintf_append(&msg, "    ");
31       ralloc_vasprintf_append(&msg, fmt, args);
32    }
33 
34    if (program->debug.func)
35       program->debug.func(program->debug.private_data, level, msg);
36 
37    fprintf(program->debug.output, "%s\n", msg);
38 
39    ralloc_free(msg);
40 }
41 
42 void
_aco_err(Program * program,const char * file,unsigned line,const char * fmt,...)43 _aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)
44 {
45    va_list args;
46 
47    va_start(args, fmt);
48    aco_log(program, ACO_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);
49    va_end(args);
50 }
51 
52 bool
validate_ir(Program * program)53 validate_ir(Program* program)
54 {
55    bool is_valid = true;
56    auto check = [&program, &is_valid](bool success, const char* msg,
57                                       aco::Instruction* instr) -> void
58    {
59       if (!success) {
60          char* out;
61          size_t outsize;
62          struct u_memstream mem;
63          u_memstream_open(&mem, &out, &outsize);
64          FILE* const memf = u_memstream_get(&mem);
65 
66          fprintf(memf, "%s: ", msg);
67          aco_print_instr(program->gfx_level, instr, memf);
68          u_memstream_close(&mem);
69 
70          aco_err(program, "%s", out);
71          free(out);
72 
73          is_valid = false;
74       }
75    };
76 
77    /* check reachability */
78    if (program->progress < CompilationProgress::after_lower_to_hw) {
79       std::map<uint32_t, std::pair<uint32_t, bool>> def_blocks;
80       for (Block& block : program->blocks) {
81          for (aco_ptr<Instruction>& instr : block.instructions) {
82             for (Definition def : instr->definitions) {
83                if (!def.isTemp())
84                   continue;
85                check(!def_blocks.count(def.tempId()), "Temporary defined twice", instr.get());
86                def_blocks[def.tempId()] = std::make_pair(block.index, false);
87             }
88          }
89       }
90 
91       for (Block& block : program->blocks) {
92          for (aco_ptr<Instruction>& instr : block.instructions) {
93             for (unsigned i = 0; i < instr->operands.size(); i++) {
94                Operand op = instr->operands[i];
95                if (!op.isTemp())
96                   continue;
97 
98                uint32_t use_block_idx = block.index;
99                if (instr->opcode == aco_opcode::p_phi || instr->opcode == aco_opcode::p_boolean_phi)
100                   use_block_idx = block.logical_preds[i];
101                else if (instr->opcode == aco_opcode::p_linear_phi)
102                   use_block_idx = block.linear_preds[i];
103 
104                auto it = def_blocks.find(op.tempId());
105                if (it != def_blocks.end()) {
106                   Block& def_block = program->blocks[it->second.first];
107                   Block& use_block = program->blocks[use_block_idx];
108                   bool dominates =
109                      def_block.index == use_block_idx
110                         ? (use_block_idx == block.index ? it->second.second : true)
111                         : (op.regClass().is_linear() ? dominates_linear(def_block, use_block)
112                                                      : dominates_logical(def_block, use_block));
113                   if (!dominates) {
114                      char msg[256];
115                      snprintf(msg, sizeof(msg), "Definition of %%%u does not dominate use",
116                               op.tempId());
117                      check(false, msg, instr.get());
118                   }
119                } else {
120                   char msg[256];
121                   snprintf(msg, sizeof(msg), "%%%u never defined", op.tempId());
122                   check(false, msg, instr.get());
123                }
124             }
125 
126             for (Definition def : instr->definitions) {
127                if (def.isTemp())
128                   def_blocks[def.tempId()].second = true;
129             }
130          }
131       }
132    }
133 
134    for (Block& block : program->blocks) {
135       for (aco_ptr<Instruction>& instr : block.instructions) {
136 
137          if (program->progress < CompilationProgress::after_lower_to_hw) {
138             for (const Operand& op : instr->operands)
139                check(!op.isTemp() || op.regClass() == program->temp_rc[op.tempId()],
140                      "Operand RC not consistent.", instr.get());
141 
142             for (const Definition& def : instr->definitions)
143                check(!def.isTemp() || def.regClass() == program->temp_rc[def.tempId()],
144                      "Definition RC not consistent.", instr.get());
145          }
146 
147          unsigned pck_defs = instr_info.definitions[(int)instr->opcode];
148          unsigned pck_ops = instr_info.operands[(int)instr->opcode];
149 
150          if (pck_defs != 0) {
151             /* Before GFX10 v_cmpx also writes VCC. */
152             if (instr->isVOPC() && program->gfx_level < GFX10 && pck_defs == exec_hi)
153                pck_defs = vcc | (exec_hi << 8);
154 
155             for (unsigned i = 0; i < 4; i++) {
156                uint32_t def = (pck_defs >> (i * 8)) & 0xff;
157                if (def == 0) {
158                   check(i == instr->definitions.size(), "Too many definitions", instr.get());
159                   break;
160                } else {
161                   check(i < instr->definitions.size(), "Too few definitions", instr.get());
162                   if (i >= instr->definitions.size())
163                      break;
164                }
165 
166                if (def == m0) {
167                   check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == m0,
168                         "Definition needs m0", instr.get());
169                } else if (def == scc) {
170                   check(instr->definitions[i].isFixed() && instr->definitions[i].physReg() == scc,
171                         "Definition needs scc", instr.get());
172                } else if (def == exec_hi) {
173                   RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
174                   check(instr->definitions[i].isFixed() &&
175                            instr->definitions[i].physReg() == exec &&
176                            instr->definitions[i].regClass() == rc,
177                         "Definition needs exec", instr.get());
178                } else if (def == exec_lo) {
179                   check(instr->definitions[i].isFixed() &&
180                            instr->definitions[i].physReg() == exec_lo &&
181                            instr->definitions[i].regClass() == s1,
182                         "Definition needs exec_lo", instr.get());
183                } else if (def == vcc) {
184                   check(instr->definitions[i].regClass() == program->lane_mask,
185                         "Definition has to be lane mask", instr.get());
186                   check(!instr->definitions[i].isFixed() ||
187                            instr->definitions[i].physReg() == vcc || instr->isVOP3() ||
188                            instr->isSDWA(),
189                         "Definition has to be vcc", instr.get());
190                } else {
191                   check(instr->definitions[i].size() == def, "Definition has wrong size",
192                         instr.get());
193                }
194             }
195          }
196 
197          if (pck_ops != 0) {
198             for (unsigned i = 0; i < 4; i++) {
199                uint32_t op = (pck_ops >> (i * 8)) & 0xff;
200                if (op == 0) {
201                   check(i == instr->operands.size(), "Too many operands", instr.get());
202                   break;
203                } else {
204                   check(i < instr->operands.size(), "Too few operands", instr.get());
205                   if (i >= instr->operands.size())
206                      break;
207                }
208 
209                if (op == m0) {
210                   check(instr->operands[i].isFixed() && instr->operands[i].physReg() == m0,
211                         "Operand needs m0", instr.get());
212                } else if (op == scc) {
213                   check(instr->operands[i].isFixed() && instr->operands[i].physReg() == scc,
214                         "Operand needs scc", instr.get());
215                } else if (op == exec_hi) {
216                   RegClass rc = instr->isSALU() ? s2 : program->lane_mask;
217                   check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec &&
218                            instr->operands[i].hasRegClass() && instr->operands[i].regClass() == rc,
219                         "Operand needs exec", instr.get());
220                } else if (op == exec_lo) {
221                   check(instr->operands[i].isFixed() && instr->operands[i].physReg() == exec_lo &&
222                            instr->operands[i].hasRegClass() && instr->operands[i].regClass() == s1,
223                         "Operand needs exec_lo", instr.get());
224                } else if (op == vcc) {
225                   check(instr->operands[i].hasRegClass() &&
226                            instr->operands[i].regClass() == program->lane_mask,
227                         "Operand has to be lane mask", instr.get());
228                   check(!instr->operands[i].isFixed() || instr->operands[i].physReg() == vcc ||
229                            instr->isVOP3(),
230                         "Operand has to be vcc", instr.get());
231                } else {
232                   check(instr->operands[i].size() == op ||
233                            (instr->operands[i].isFixed() && instr->operands[i].physReg() >= 128 &&
234                             instr->operands[i].physReg() < 256),
235                         "Operand has wrong size", instr.get());
236                }
237             }
238          }
239 
240          /* check base format */
241          Format base_format = instr->format;
242          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);
243          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP16);
244          base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP8);
245          if ((uint32_t)base_format & (uint32_t)Format::VOP1)
246             base_format = Format::VOP1;
247          else if ((uint32_t)base_format & (uint32_t)Format::VOP2)
248             base_format = Format::VOP2;
249          else if ((uint32_t)base_format & (uint32_t)Format::VOPC)
250             base_format = Format::VOPC;
251          else if (base_format == Format::VINTRP) {
252             if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||
253                 instr->opcode == aco_opcode::v_interp_p1lv_f16 ||
254                 instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||
255                 instr->opcode == aco_opcode::v_interp_p2_f16 ||
256                 instr->opcode == aco_opcode::v_interp_p2_hi_f16) {
257                /* v_interp_*_fp16 are considered VINTRP by the compiler but
258                 * they are emitted as VOP3.
259                 */
260                base_format = Format::VOP3;
261             } else {
262                base_format = Format::VINTRP;
263             }
264          }
265          check(base_format == instr_info.format[(int)instr->opcode],
266                "Wrong base format for instruction", instr.get());
267 
268          /* check VOP3 modifiers */
269          if (instr->isVOP3() && withoutDPP(instr->format) != Format::VOP3) {
270             check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
271                      base_format == Format::VOPC || base_format == Format::VINTRP,
272                   "Format cannot have VOP3/VOP3B applied", instr.get());
273          }
274 
275          if (instr->isDPP()) {
276             check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
277                      base_format == Format::VOPC || base_format == Format::VOP3 ||
278                      base_format == Format::VOP3P,
279                   "Format cannot have DPP applied", instr.get());
280             check((!instr->isVOP3() && !instr->isVOP3P()) || program->gfx_level >= GFX11,
281                   "VOP3+DPP is GFX11+ only", instr.get());
282 
283             bool fi =
284                instr->isDPP8() ? instr->dpp8().fetch_inactive : instr->dpp16().fetch_inactive;
285             check(!fi || program->gfx_level >= GFX10, "DPP Fetch-Inactive is GFX10+ only",
286                   instr.get());
287          }
288 
289          /* check SDWA */
290          if (instr->isSDWA()) {
291             check(base_format == Format::VOP2 || base_format == Format::VOP1 ||
292                      base_format == Format::VOPC,
293                   "Format cannot have SDWA applied", instr.get());
294 
295             check(program->gfx_level >= GFX8, "SDWA is GFX8 to GFX10.3 only", instr.get());
296             check(program->gfx_level < GFX11, "SDWA is GFX8 to GFX10.3 only", instr.get());
297 
298             SDWA_instruction& sdwa = instr->sdwa();
299             check(sdwa.omod == 0 || program->gfx_level >= GFX9, "SDWA omod only supported on GFX9+",
300                   instr.get());
301             if (base_format == Format::VOPC) {
302                check(sdwa.clamp == false || program->gfx_level == GFX8,
303                      "SDWA VOPC clamp only supported on GFX8", instr.get());
304                check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||
305                         program->gfx_level >= GFX9,
306                      "SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());
307             } else {
308                const Definition& def = instr->definitions[0];
309                check(def.bytes() <= 4, "SDWA definitions must not be larger than 4 bytes",
310                      instr.get());
311                check(def.bytes() >= sdwa.dst_sel.size() + sdwa.dst_sel.offset(),
312                      "SDWA definition selection size must be at most definition size", instr.get());
313                check(
314                   sdwa.dst_sel.size() == 1 || sdwa.dst_sel.size() == 2 || sdwa.dst_sel.size() == 4,
315                   "SDWA definition selection size must be 1, 2 or 4 bytes", instr.get());
316                check(sdwa.dst_sel.offset() % sdwa.dst_sel.size() == 0, "Invalid selection offset",
317                      instr.get());
318                check(def.bytes() == 4 || def.bytes() == sdwa.dst_sel.size(),
319                      "SDWA dst_sel size must be definition size for subdword definitions",
320                      instr.get());
321                check(def.bytes() == 4 || sdwa.dst_sel.offset() == 0,
322                      "SDWA dst_sel offset must be 0 for subdword definitions", instr.get());
323             }
324 
325             for (unsigned i = 0; i < std::min<unsigned>(2, instr->operands.size()); i++) {
326                const Operand& op = instr->operands[i];
327                check(op.bytes() <= 4, "SDWA operands must not be larger than 4 bytes", instr.get());
328                check(op.bytes() >= sdwa.sel[i].size() + sdwa.sel[i].offset(),
329                      "SDWA operand selection size must be at most operand size", instr.get());
330                check(sdwa.sel[i].size() == 1 || sdwa.sel[i].size() == 2 || sdwa.sel[i].size() == 4,
331                      "SDWA operand selection size must be 1, 2 or 4 bytes", instr.get());
332                check(sdwa.sel[i].offset() % sdwa.sel[i].size() == 0, "Invalid selection offset",
333                      instr.get());
334             }
335             if (instr->operands.size() >= 3) {
336                check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,
337                      "3rd operand must be fixed to vcc with SDWA", instr.get());
338             }
339             if (instr->definitions.size() >= 2) {
340                check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,
341                      "2nd definition must be fixed to vcc with SDWA", instr.get());
342             }
343 
344             const bool sdwa_opcodes =
345                instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&
346                instr->opcode != aco_opcode::v_fmamk_f32 &&
347                instr->opcode != aco_opcode::v_fmaak_f32 &&
348                instr->opcode != aco_opcode::v_fmamk_f16 &&
349                instr->opcode != aco_opcode::v_fmaak_f16 &&
350                instr->opcode != aco_opcode::v_madmk_f32 &&
351                instr->opcode != aco_opcode::v_madak_f32 &&
352                instr->opcode != aco_opcode::v_madmk_f16 &&
353                instr->opcode != aco_opcode::v_madak_f16 &&
354                instr->opcode != aco_opcode::v_readfirstlane_b32 &&
355                instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;
356 
357             const bool feature_mac =
358                program->gfx_level == GFX8 &&
359                (instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);
360 
361             check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());
362          }
363 
364          /* check opsel */
365          if (instr->opcode == aco_opcode::v_permlane16_b32 ||
366              instr->opcode == aco_opcode::v_permlanex16_b32) {
367             check(instr->valu().opsel <= 0x3, "Unexpected opsel for permlane", instr.get());
368          } else if (instr->isVOP3() || instr->isVOP1() || instr->isVOP2() || instr->isVOPC()) {
369             VALU_instruction& valu = instr->valu();
370             check(valu.opsel == 0 || program->gfx_level >= GFX9, "Opsel is only supported on GFX9+",
371                   instr.get());
372             check(valu.opsel == 0 || instr->format == Format::VOP3 || program->gfx_level >= GFX11,
373                   "Opsel is only supported for VOP3 before GFX11", instr.get());
374 
375             for (unsigned i = 0; i < 3; i++) {
376                if (i >= instr->operands.size() ||
377                    (!instr->isVOP3() && !instr->operands[i].isOfType(RegType::vgpr)) ||
378                    (instr->operands[i].hasRegClass() &&
379                     instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))
380                   check(!valu.opsel[i], "Unexpected opsel for operand", instr.get());
381             }
382             if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())
383                check(!valu.opsel[3], "Unexpected opsel for sub-dword definition", instr.get());
384          } else if (instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
385                     instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
386                     instr->opcode == aco_opcode::v_fma_mix_f32) {
387             check(instr->definitions[0].regClass() ==
388                      (instr->opcode == aco_opcode::v_fma_mix_f32 ? v1 : v2b),
389                   "v_fma_mix_f32/v_fma_mix_f16 must have v1/v2b definition", instr.get());
390          } else if (instr->isVOP3P()) {
391             VALU_instruction& vop3p = instr->valu();
392             for (unsigned i = 0; i < instr->operands.size(); i++) {
393                if (instr->operands[i].hasRegClass() &&
394                    instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed())
395                   check(!vop3p.opsel_lo[i] && !vop3p.opsel_hi[i],
396                         "Unexpected opsel for subdword operand", instr.get());
397             }
398             check(instr->definitions[0].regClass() == v1 ||
399                      instr_info.classes[(int)instr->opcode] == instr_class::wmma,
400                   "VOP3P must have v1 definition", instr.get());
401          }
402 
403          /* check for undefs */
404          for (unsigned i = 0; i < instr->operands.size(); i++) {
405             if (instr->operands[i].isUndefined()) {
406                bool flat = instr->isFlatLike();
407                bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||
408                                    instr->opcode == aco_opcode::p_create_vector ||
409                                    instr->opcode == aco_opcode::p_start_linear_vgpr ||
410                                    instr->opcode == aco_opcode::p_jump_to_epilog ||
411                                    instr->opcode == aco_opcode::p_dual_src_export_gfx11 ||
412                                    instr->opcode == aco_opcode::p_end_with_regs ||
413                                    (instr->opcode == aco_opcode::p_interp_gfx11 && i == 0) ||
414                                    (instr->opcode == aco_opcode::p_bpermute_permlane && i == 0) ||
415                                    (flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||
416                                    ((instr->isMUBUF() || instr->isMTBUF()) && i == 1) ||
417                                    (instr->isScratch() && i == 0) || (instr->isDS() && i == 0) ||
418                                    (instr->opcode == aco_opcode::p_init_scratch && i == 0);
419                check(can_be_undef, "Undefs can only be used in certain operands", instr.get());
420             } else {
421                check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||
422                         instr->operands[i].isConstant(),
423                      "Uninitialized Operand", instr.get());
424             }
425          }
426 
427          for (Operand& op : instr->operands) {
428             if (op.isFixed() || !op.hasRegClass() || !op.regClass().is_linear_vgpr() ||
429                 op.isUndefined())
430                continue;
431 
432             /* Only kill linear VGPRs in top-level blocks. Otherwise, we might have to move linear
433              * VGPRs to make space for normal ones and that isn't possible inside control flow. */
434             if (op.isKill()) {
435                check(block.kind & block_kind_top_level,
436                      "Linear VGPR operands must only be killed at top-level blocks", instr.get());
437             }
438          }
439 
440          /* check subdword definitions */
441          for (unsigned i = 0; i < instr->definitions.size(); i++) {
442             if (instr->definitions[i].regClass().is_subdword())
443                check(instr->definitions[i].bytes() <= 4 || instr->isPseudo() || instr->isVMEM(),
444                      "Only Pseudo and VMEM instructions can write subdword registers > 4 bytes",
445                      instr.get());
446          }
447 
448          if ((instr->isSALU() && instr->opcode != aco_opcode::p_constaddr_addlo &&
449               instr->opcode != aco_opcode::p_resumeaddr_addlo) ||
450              instr->isVALU()) {
451             /* check literals */
452             Operand literal(s1);
453             for (unsigned i = 0; i < instr->operands.size(); i++) {
454                Operand op = instr->operands[i];
455                if (!op.isLiteral())
456                   continue;
457 
458                check(!instr->isDPP() && !instr->isSDWA() &&
459                         (!instr->isVOP3() || program->gfx_level >= GFX10) &&
460                         (!instr->isVOP3P() || program->gfx_level >= GFX10),
461                      "Literal applied on wrong instruction format", instr.get());
462 
463                check(literal.isUndefined() || (literal.size() == op.size() &&
464                                                literal.constantValue() == op.constantValue()),
465                      "Only 1 Literal allowed", instr.get());
466                literal = op;
467                check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,
468                      "Wrong source position for Literal argument", instr.get());
469             }
470 
471             /* check num sgprs for VALU */
472             if (instr->isVALU()) {
473                bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64_e64 ||
474                                  instr->opcode == aco_opcode::v_lshlrev_b64 ||
475                                  instr->opcode == aco_opcode::v_lshrrev_b64 ||
476                                  instr->opcode == aco_opcode::v_ashrrev_i64;
477                unsigned const_bus_limit = 1;
478                if (program->gfx_level >= GFX10 && !is_shift64)
479                   const_bus_limit = 2;
480 
481                uint32_t scalar_mask =
482                   instr->isVOP3() || instr->isVOP3P() || instr->isVINTERP_INREG() ? 0x7 : 0x5;
483                if (instr->isSDWA())
484                   scalar_mask = program->gfx_level >= GFX9 ? 0x7 : 0x4;
485                else if (instr->isDPP())
486                   scalar_mask = 0x4;
487 
488                if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||
489                    instr->opcode == aco_opcode::v_readlane_b32 ||
490                    instr->opcode == aco_opcode::v_readlane_b32_e64 ||
491                    instr_info.classes[(int)instr->opcode] ==
492                       instr_class::valu_pseudo_scalar_trans) {
493                   check(instr->definitions[0].regClass().type() == RegType::sgpr,
494                         "Wrong Definition type for VALU instruction", instr.get());
495                } else {
496                   check(instr->definitions[0].regClass().type() == RegType::vgpr,
497                         "Wrong Definition type for VALU instruction", instr.get());
498                }
499 
500                unsigned num_sgprs = 0;
501                unsigned sgpr[] = {0, 0};
502                for (unsigned i = 0; i < instr->operands.size(); i++) {
503                   Operand op = instr->operands[i];
504                   if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||
505                       instr->opcode == aco_opcode::v_readlane_b32 ||
506                       instr->opcode == aco_opcode::v_readlane_b32_e64) {
507                      check(i != 1 || op.isOfType(RegType::sgpr) || op.isConstant(),
508                            "Must be a SGPR or a constant", instr.get());
509                      check(i == 1 || (op.isOfType(RegType::vgpr) && op.bytes() <= 4),
510                            "Wrong Operand type for VALU instruction", instr.get());
511                      continue;
512                   }
513                   if (instr->opcode == aco_opcode::v_permlane16_b32 ||
514                       instr->opcode == aco_opcode::v_permlanex16_b32 ||
515                       instr->opcode == aco_opcode::v_permlane64_b32) {
516                      check(i != 0 || op.isOfType(RegType::vgpr),
517                            "Operand 0 of v_permlane must be VGPR", instr.get());
518                      check(i == 0 || op.isOfType(RegType::sgpr) || op.isConstant(),
519                            "Lane select operands of v_permlane must be SGPR or constant",
520                            instr.get());
521                   }
522 
523                   if (instr->opcode == aco_opcode::v_writelane_b32 ||
524                       instr->opcode == aco_opcode::v_writelane_b32_e64) {
525                      check(i != 2 || (op.isOfType(RegType::vgpr) && op.bytes() <= 4),
526                            "Wrong Operand type for VALU instruction", instr.get());
527                      check(i == 2 || op.isOfType(RegType::sgpr) || op.isConstant(),
528                            "Must be a SGPR or a constant", instr.get());
529                      continue;
530                   }
531                   if (op.isOfType(RegType::sgpr)) {
532                      check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",
533                            instr.get());
534 
535                      if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {
536                         if (num_sgprs < 2)
537                            sgpr[num_sgprs++] = op.tempId();
538                      }
539                   }
540 
541                   if (op.isConstant() && !op.isLiteral())
542                      check(scalar_mask & (1 << i), "Wrong source position for constant argument",
543                            instr.get());
544                }
545                check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,
546                      "Too many SGPRs/literals", instr.get());
547 
548                /* Validate modifiers. */
549                check(!instr->valu().opsel || instr->isVOP3() || instr->isVOP1() ||
550                         instr->isVOP2() || instr->isVOPC() || instr->isVINTERP_INREG(),
551                      "OPSEL set for unsupported instruction format", instr.get());
552                check(!instr->valu().opsel_lo || instr->isVOP3P(),
553                      "OPSEL_LO set for unsupported instruction format", instr.get());
554                check(!instr->valu().opsel_hi || instr->isVOP3P(),
555                      "OPSEL_HI set for unsupported instruction format", instr.get());
556                check(!instr->valu().omod || instr->isVOP3() || instr->isSDWA(),
557                      "OMOD set for unsupported instruction format", instr.get());
558                check(!instr->valu().clamp || instr->isVOP3() || instr->isVOP3P() ||
559                         instr->isSDWA() || instr->isVINTERP_INREG(),
560                      "CLAMP set for unsupported instruction format", instr.get());
561 
562                for (bool abs : instr->valu().abs) {
563                   check(!abs || instr->isVOP3() || instr->isVOP3P() || instr->isSDWA() ||
564                            instr->isDPP16(),
565                         "ABS/NEG_HI set for unsupported instruction format", instr.get());
566                }
567                for (bool neg : instr->valu().neg) {
568                   check(!neg || instr->isVOP3() || instr->isVOP3P() || instr->isSDWA() ||
569                            instr->isDPP16() || instr->isVINTERP_INREG(),
570                         "NEG/NEG_LO set for unsupported instruction format", instr.get());
571                }
572             }
573 
574             if (instr->isSOP1() || instr->isSOP2()) {
575                if (!instr->definitions.empty())
576                   check(instr->definitions[0].regClass().type() == RegType::sgpr,
577                         "Wrong Definition type for SALU instruction", instr.get());
578                for (const Operand& op : instr->operands) {
579                   check(op.isConstant() || op.isOfType(RegType::sgpr),
580                         "Wrong Operand type for SALU instruction", instr.get());
581                }
582             }
583          }
584 
585          switch (instr->format) {
586          case Format::PSEUDO: {
587             if (instr->opcode == aco_opcode::p_create_vector ||
588                 instr->opcode == aco_opcode::p_start_linear_vgpr) {
589                unsigned size = 0;
590                for (const Operand& op : instr->operands) {
591                   check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());
592                   size += op.bytes();
593                }
594                if (!instr->operands.empty() || instr->opcode == aco_opcode::p_create_vector) {
595                   check(size == instr->definitions[0].bytes(),
596                         "Definition size does not match operand sizes", instr.get());
597                }
598                if (instr->definitions[0].regClass().type() == RegType::sgpr) {
599                   for (const Operand& op : instr->operands) {
600                      check(op.isConstant() || op.regClass().type() == RegType::sgpr,
601                            "Wrong Operand type for scalar vector", instr.get());
602                   }
603                }
604                if (instr->opcode == aco_opcode::p_start_linear_vgpr)
605                   check(instr->definitions[0].regClass().is_linear_vgpr(),
606                         "Definition must be linear VGPR", instr.get());
607             } else if (instr->opcode == aco_opcode::p_extract_vector) {
608                check(!instr->operands[0].isConstant() && instr->operands[1].isConstant(),
609                      "Wrong Operand types", instr.get());
610                check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=
611                         instr->operands[0].bytes(),
612                      "Index out of range", instr.get());
613                check(instr->definitions[0].regClass().type() == RegType::vgpr ||
614                         instr->operands[0].regClass().type() == RegType::sgpr,
615                      "Cannot extract SGPR value from VGPR vector", instr.get());
616                check(program->gfx_level >= GFX9 ||
617                         !instr->definitions[0].regClass().is_subdword() ||
618                         instr->operands[0].regClass().type() == RegType::vgpr,
619                      "Cannot extract subdword from SGPR before GFX9+", instr.get());
620             } else if (instr->opcode == aco_opcode::p_split_vector) {
621                check(!instr->operands[0].isConstant(), "Operand must not be constant", instr.get());
622                unsigned size = 0;
623                for (const Definition& def : instr->definitions) {
624                   size += def.bytes();
625                }
626                check(size == instr->operands[0].bytes(),
627                      "Operand size does not match definition sizes", instr.get());
628                if (instr->operands[0].isOfType(RegType::vgpr)) {
629                   for (const Definition& def : instr->definitions)
630                      check(def.regClass().type() == RegType::vgpr,
631                            "Wrong Definition type for VGPR split_vector", instr.get());
632                } else {
633                   for (const Definition& def : instr->definitions)
634                      check(program->gfx_level >= GFX9 || !def.regClass().is_subdword(),
635                            "Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());
636                }
637             } else if (instr->opcode == aco_opcode::p_parallelcopy) {
638                check(instr->definitions.size() == instr->operands.size(),
639                      "Number of Operands does not match number of Definitions", instr.get());
640                for (unsigned i = 0; i < instr->operands.size(); i++) {
641                   check(instr->definitions[i].bytes() == instr->operands[i].bytes(),
642                         "Operand and Definition size must match", instr.get());
643                   if (instr->operands[i].hasRegClass()) {
644                      check((instr->definitions[i].regClass().type() ==
645                             instr->operands[i].regClass().type()) ||
646                               (instr->definitions[i].regClass().type() == RegType::vgpr &&
647                                instr->operands[i].regClass().type() == RegType::sgpr),
648                            "Operand and Definition types do not match", instr.get());
649                      check(instr->definitions[i].regClass().is_linear_vgpr() ==
650                               instr->operands[i].regClass().is_linear_vgpr(),
651                            "Operand and Definition types do not match", instr.get());
652                   } else {
653                      check(!instr->definitions[i].regClass().is_linear_vgpr(),
654                            "Can only copy linear VGPRs into linear VGPRs, not constant/undef",
655                            instr.get());
656                   }
657                }
658             } else if (instr->opcode == aco_opcode::p_phi) {
659                check(instr->operands.size() == block.logical_preds.size(),
660                      "Number of Operands does not match number of predecessors", instr.get());
661                check(instr->definitions[0].regClass().type() == RegType::vgpr,
662                      "Logical Phi Definition must be vgpr", instr.get());
663                for (const Operand& op : instr->operands)
664                   check(instr->definitions[0].size() == op.size(),
665                         "Operand sizes must match Definition size", instr.get());
666             } else if (instr->opcode == aco_opcode::p_linear_phi) {
667                for (const Operand& op : instr->operands) {
668                   check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",
669                         instr.get());
670                   check(instr->definitions[0].size() == op.size(),
671                         "Operand sizes must match Definition size", instr.get());
672                }
673                check(instr->operands.size() == block.linear_preds.size(),
674                      "Number of Operands does not match number of predecessors", instr.get());
675             } else if (instr->opcode == aco_opcode::p_extract ||
676                        instr->opcode == aco_opcode::p_insert) {
677                check(!instr->operands[0].isConstant(), "Data operand must not be constant",
678                      instr.get());
679                check(instr->operands[1].isConstant(), "Index must be constant", instr.get());
680                if (instr->opcode == aco_opcode::p_extract)
681                   check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",
682                         instr.get());
683 
684                check(instr->definitions[0].regClass().type() != RegType::sgpr ||
685                         instr->operands[0].regClass().type() == RegType::sgpr,
686                      "Can't extract/insert VGPR to SGPR", instr.get());
687 
688                if (instr->opcode == aco_opcode::p_insert)
689                   check(instr->operands[0].bytes() == instr->definitions[0].bytes(),
690                         "Sizes of p_insert data operand and definition must match", instr.get());
691 
692                if (instr->definitions[0].regClass().type() == RegType::sgpr)
693                   check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&
694                            instr->definitions[1].physReg() == scc,
695                         "SGPR extract/insert needs an SCC definition", instr.get());
696 
697                unsigned data_bits = instr->operands[0].bytes() * 8u;
698                unsigned op_bits = instr->operands[2].constantValue();
699 
700                if (instr->opcode == aco_opcode::p_insert) {
701                   check(op_bits == 8 || op_bits == 16, "Size must be 8 or 16", instr.get());
702                   check(op_bits < data_bits, "Size must be smaller than source", instr.get());
703                } else if (instr->opcode == aco_opcode::p_extract) {
704                   check(op_bits == 8 || op_bits == 16 || op_bits == 32,
705                         "Size must be 8 or 16 or 32", instr.get());
706                   check(data_bits >= op_bits, "Can't extract more bits than what the data has.",
707                         instr.get());
708                }
709 
710                unsigned comp = data_bits / MAX2(op_bits, 1);
711                check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",
712                      instr.get());
713             } else if (instr->opcode == aco_opcode::p_jump_to_epilog) {
714                check(instr->definitions.size() == 0, "p_jump_to_epilog must have 0 definitions",
715                      instr.get());
716                check(instr->operands.size() > 0 && instr->operands[0].isOfType(RegType::sgpr) &&
717                         instr->operands[0].size() == 2,
718                      "First operand of p_jump_to_epilog must be a SGPR", instr.get());
719                for (unsigned i = 1; i < instr->operands.size(); i++) {
720                   check(instr->operands[i].isOfType(RegType::vgpr) ||
721                            instr->operands[i].isOfType(RegType::sgpr) ||
722                            instr->operands[i].isUndefined(),
723                         "Other operands of p_jump_to_epilog must be VGPRs, SGPRs or undef",
724                         instr.get());
725                }
726             } else if (instr->opcode == aco_opcode::p_dual_src_export_gfx11) {
727                check(instr->definitions.size() == 6,
728                      "p_dual_src_export_gfx11 must have 6 definitions", instr.get());
729                check(instr->definitions[2].regClass() == program->lane_mask,
730                      "Third definition of p_dual_src_export_gfx11 must be a lane mask",
731                      instr.get());
732                check(instr->definitions[3].regClass() == program->lane_mask,
733                      "Fourth definition of p_dual_src_export_gfx11 must be a lane mask",
734                      instr.get());
735                check(instr->definitions[4].physReg() == vcc,
736                      "Fifth definition of p_dual_src_export_gfx11 must be vcc", instr.get());
737                check(instr->definitions[5].physReg() == scc,
738                      "Sixth definition of p_dual_src_export_gfx11 must be scc", instr.get());
739                check(instr->operands.size() == 8, "p_dual_src_export_gfx11 must have 8 operands",
740                      instr.get());
741                for (unsigned i = 0; i < instr->operands.size(); i++) {
742                   check(
743                      instr->operands[i].isOfType(RegType::vgpr) || instr->operands[i].isUndefined(),
744                      "Operands of p_dual_src_export_gfx11 must be VGPRs or undef", instr.get());
745                }
746             }
747             break;
748          }
749          case Format::PSEUDO_REDUCTION: {
750             for (const Operand& op : instr->operands)
751                check(op.regClass().type() == RegType::vgpr,
752                      "All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",
753                      instr.get());
754 
755             if (instr->opcode == aco_opcode::p_reduce &&
756                 instr->reduction().cluster_size == program->wave_size)
757                check(instr->definitions[0].regClass().type() == RegType::sgpr ||
758                         program->wave_size == 32,
759                      "The result of unclustered reductions must go into an SGPR.", instr.get());
760             else
761                check(instr->definitions[0].regClass().type() == RegType::vgpr,
762                      "The result of scans and clustered reductions must go into a VGPR.",
763                      instr.get());
764 
765             break;
766          }
767          case Format::SMEM: {
768             if (instr->operands.size() >= 1)
769                check(instr->operands[0].isOfType(RegType::sgpr), "SMEM operands must be sgpr",
770                      instr.get());
771             if (instr->operands.size() >= 2)
772                check(instr->operands[1].isConstant() || instr->operands[1].isOfType(RegType::sgpr),
773                      "SMEM offset must be constant or sgpr", instr.get());
774             if (!instr->definitions.empty())
775                check(instr->definitions[0].regClass().type() == RegType::sgpr,
776                      "SMEM result must be sgpr", instr.get());
777             break;
778          }
779          case Format::MTBUF:
780          case Format::MUBUF: {
781             check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",
782                   instr.get());
783             check(instr->operands[1].isOfType(RegType::vgpr),
784                   "VADDR must be in vgpr for VMEM instructions", instr.get());
785             check(instr->operands[0].isOfType(RegType::sgpr), "VMEM resource constant must be sgpr",
786                   instr.get());
787             check(instr->operands.size() < 4 || instr->operands[3].isOfType(RegType::vgpr),
788                   "VMEM write data must be vgpr", instr.get());
789             if (instr->operands.size() >= 3 && instr->operands[2].isConstant())
790                check(program->gfx_level < GFX12 || instr->operands[2].constantValue() == 0,
791                      "VMEM SOFFSET must not be non-zero constant on GFX12+", instr.get());
792 
793             const bool d16 =
794                instr->opcode ==
795                   aco_opcode::buffer_load_dword || // FIXME: used to spill subdword variables
796                instr->opcode == aco_opcode::buffer_load_ubyte ||
797                instr->opcode == aco_opcode::buffer_load_sbyte ||
798                instr->opcode == aco_opcode::buffer_load_ushort ||
799                instr->opcode == aco_opcode::buffer_load_sshort ||
800                instr->opcode == aco_opcode::buffer_load_ubyte_d16 ||
801                instr->opcode == aco_opcode::buffer_load_ubyte_d16_hi ||
802                instr->opcode == aco_opcode::buffer_load_sbyte_d16 ||
803                instr->opcode == aco_opcode::buffer_load_sbyte_d16_hi ||
804                instr->opcode == aco_opcode::buffer_load_short_d16 ||
805                instr->opcode == aco_opcode::buffer_load_short_d16_hi ||
806                instr->opcode == aco_opcode::buffer_load_format_d16_x ||
807                instr->opcode == aco_opcode::buffer_load_format_d16_hi_x ||
808                instr->opcode == aco_opcode::buffer_load_format_d16_xy ||
809                instr->opcode == aco_opcode::buffer_load_format_d16_xyz ||
810                instr->opcode == aco_opcode::buffer_load_format_d16_xyzw ||
811                instr->opcode == aco_opcode::tbuffer_load_format_d16_x ||
812                instr->opcode == aco_opcode::tbuffer_load_format_d16_xy ||
813                instr->opcode == aco_opcode::tbuffer_load_format_d16_xyz ||
814                instr->opcode == aco_opcode::tbuffer_load_format_d16_xyzw;
815             if (instr->definitions.size()) {
816                check(instr->definitions[0].regClass().type() == RegType::vgpr,
817                      "VMEM definitions[0] (VDATA) must be VGPR", instr.get());
818                check(d16 || !instr->definitions[0].regClass().is_subdword(),
819                      "Only D16 opcodes can load subdword values.", instr.get());
820                check(instr->definitions[0].bytes() <= 8 || !d16,
821                      "D16 opcodes can only load up to 8 bytes.", instr.get());
822             }
823             break;
824          }
825          case Format::MIMG: {
826             check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",
827                   instr.get());
828             check(instr->operands[0].hasRegClass() &&
829                      (instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),
830                   "MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());
831             if (instr->operands[1].hasRegClass())
832                check(instr->operands[1].regClass() == s4,
833                      "MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());
834             if (!instr->operands[2].isUndefined()) {
835                bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||
836                                  instr->opcode == aco_opcode::image_atomic_fcmpswap;
837                check(instr->definitions.empty() ||
838                         (instr->definitions[0].regClass() == instr->operands[2].regClass() ||
839                          is_cmpswap),
840                      "MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "
841                      "TFE/LWE loads",
842                      instr.get());
843             }
844 
845             if (instr->mimg().strict_wqm) {
846                check(instr->operands[3].hasRegClass() &&
847                         instr->operands[3].regClass().is_linear_vgpr(),
848                      "MIMG operands[3] must be temp linear VGPR.", instr.get());
849 
850                unsigned total_size = 0;
851                for (unsigned i = 4; i < instr->operands.size(); i++) {
852                   check(instr->operands[i].hasRegClass() && instr->operands[i].regClass() == v1,
853                         "MIMG operands[4+] (VADDR) must be v1", instr.get());
854                   total_size += instr->operands[i].bytes();
855                }
856                check(total_size <= instr->operands[3].bytes(),
857                      "MIMG operands[4+] must fit within operands[3].", instr.get());
858             } else {
859                check(instr->operands.size() == 4 || program->gfx_level >= GFX10,
860                      "NSA is only supported on GFX10+", instr.get());
861                for (unsigned i = 3; i < instr->operands.size(); i++) {
862                   check(instr->operands[i].hasRegClass() &&
863                            instr->operands[i].regClass().type() == RegType::vgpr,
864                         "MIMG operands[3+] (VADDR) must be VGPR", instr.get());
865                   if (instr->operands.size() > 4) {
866                      if (program->gfx_level < GFX11) {
867                         check(instr->operands[i].regClass() == v1,
868                               "GFX10 MIMG VADDR must be v1 if NSA is used", instr.get());
869                      } else {
870                         unsigned num_scalar =
871                            program->gfx_level >= GFX12 ? (instr->operands.size() - 4) : 4;
872                         if (instr->opcode != aco_opcode::image_bvh_intersect_ray &&
873                             instr->opcode != aco_opcode::image_bvh64_intersect_ray &&
874                             i < 3 + num_scalar) {
875                            check(instr->operands[i].regClass() == v1,
876                                  "first 4 GFX11 MIMG VADDR must be v1 if NSA is used", instr.get());
877                         }
878                      }
879                   }
880                }
881             }
882 
883             if (instr->definitions.size()) {
884                check(instr->definitions[0].regClass().type() == RegType::vgpr,
885                      "MIMG definitions[0] (VDATA) must be VGPR", instr.get());
886                check(instr->mimg().d16 || !instr->definitions[0].regClass().is_subdword(),
887                      "Only D16 MIMG instructions can load subdword values.", instr.get());
888                check(instr->definitions[0].bytes() <= 8 || !instr->mimg().d16,
889                      "D16 MIMG instructions can only load up to 8 bytes.", instr.get());
890             }
891             break;
892          }
893          case Format::DS: {
894             for (const Operand& op : instr->operands) {
895                check(op.isOfType(RegType::vgpr) || op.physReg() == m0 || op.isUndefined(),
896                      "Only VGPRs are valid DS instruction operands", instr.get());
897             }
898             if (!instr->definitions.empty())
899                check(instr->definitions[0].regClass().type() == RegType::vgpr,
900                      "DS instruction must return VGPR", instr.get());
901             break;
902          }
903          case Format::EXP: {
904             for (unsigned i = 0; i < 4; i++)
905                check(instr->operands[i].isOfType(RegType::vgpr),
906                      "Only VGPRs are valid Export arguments", instr.get());
907             break;
908          }
909          case Format::FLAT:
910             check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",
911                   instr.get());
912             FALLTHROUGH;
913          case Format::GLOBAL:
914             check(instr->operands[0].isOfType(RegType::vgpr), "FLAT/GLOBAL address must be vgpr",
915                   instr.get());
916             FALLTHROUGH;
917          case Format::SCRATCH: {
918             check(instr->operands[0].isOfType(RegType::vgpr),
919                   "FLAT/GLOBAL/SCRATCH address must be undefined or vgpr", instr.get());
920             check(instr->operands[1].isOfType(RegType::sgpr),
921                   "FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());
922             if (instr->format == Format::SCRATCH && program->gfx_level < GFX10_3)
923                check(!instr->operands[0].isUndefined() || !instr->operands[1].isUndefined(),
924                      "SCRATCH must have either SADDR or ADDR operand", instr.get());
925             if (!instr->definitions.empty())
926                check(instr->definitions[0].regClass().type() == RegType::vgpr,
927                      "FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());
928             else
929                check(instr->operands[2].isOfType(RegType::vgpr),
930                      "FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());
931             break;
932          }
933          case Format::LDSDIR: {
934             check(instr->definitions.size() == 1 && instr->definitions[0].regClass() == v1,
935                   "LDSDIR must have an v1 definition", instr.get());
936             check(instr->operands.size() == 1, "LDSDIR must have an operand", instr.get());
937             if (!instr->operands.empty()) {
938                check(instr->operands[0].regClass() == s1, "LDSDIR must have an s1 operand",
939                      instr.get());
940                check(instr->operands[0].isFixed() && instr->operands[0].physReg() == m0,
941                      "LDSDIR must have an operand fixed to m0", instr.get());
942             }
943             break;
944          }
945          default: break;
946          }
947       }
948    }
949 
950    return is_valid;
951 }
952 
953 bool
validate_cfg(Program * program)954 validate_cfg(Program* program)
955 {
956    if (!(debug_flags & DEBUG_VALIDATE_IR))
957       return true;
958 
959    bool is_valid = true;
960    auto check_block = [&program, &is_valid](bool success, const char* msg,
961                                             aco::Block* block) -> void
962    {
963       if (!success) {
964          aco_err(program, "%s: BB%u", msg, block->index);
965          is_valid = false;
966       }
967    };
968 
969    /* validate CFG */
970    for (unsigned i = 0; i < program->blocks.size(); i++) {
971       Block& block = program->blocks[i];
972       check_block(block.index == i, "block.index must match actual index", &block);
973 
974       /* predecessors/successors should be sorted */
975       for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)
976          check_block(block.linear_preds[j] < block.linear_preds[j + 1],
977                      "linear predecessors must be sorted", &block);
978       for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)
979          check_block(block.logical_preds[j] < block.logical_preds[j + 1],
980                      "logical predecessors must be sorted", &block);
981       for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)
982          check_block(block.linear_succs[j] < block.linear_succs[j + 1],
983                      "linear successors must be sorted", &block);
984       for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)
985          check_block(block.logical_succs[j] < block.logical_succs[j + 1],
986                      "logical successors must be sorted", &block);
987 
988       /* critical edges are not allowed */
989       if (block.linear_preds.size() > 1) {
990          for (unsigned pred : block.linear_preds)
991             check_block(program->blocks[pred].linear_succs.size() == 1,
992                         "linear critical edges are not allowed", &program->blocks[pred]);
993          for (unsigned pred : block.logical_preds)
994             check_block(program->blocks[pred].logical_succs.size() == 1,
995                         "logical critical edges are not allowed", &program->blocks[pred]);
996       }
997    }
998 
999    return is_valid;
1000 }
1001 
1002 bool
validate_live_vars(Program * program)1003 validate_live_vars(Program* program)
1004 {
1005    if (!(debug_flags & DEBUG_VALIDATE_LIVE_VARS))
1006       return true;
1007 
1008    bool is_valid = true;
1009    const int prev_num_waves = program->num_waves;
1010    const monotonic_buffer_resource old_memory = std::move(program->live.memory);
1011    const std::vector<IDSet> prev_live_in = std::move(program->live.live_in);
1012    const RegisterDemand prev_max_demand = program->max_reg_demand;
1013    std::vector<RegisterDemand> block_demands(program->blocks.size());
1014    std::vector<RegisterDemand> live_in_demands(program->blocks.size());
1015    std::vector<std::vector<RegisterDemand>> register_demands(program->blocks.size());
1016 
1017    for (unsigned i = 0; i < program->blocks.size(); i++) {
1018       Block& b = program->blocks[i];
1019       block_demands[i] = b.register_demand;
1020       live_in_demands[i] = b.live_in_demand;
1021       register_demands[i].reserve(b.instructions.size());
1022       for (unsigned j = 0; j < b.instructions.size(); j++)
1023          register_demands[i].emplace_back(b.instructions[j]->register_demand);
1024    }
1025 
1026    aco::live_var_analysis(program);
1027 
1028    /* Validate RegisterDemand calculation */
1029    for (unsigned i = 0; i < program->blocks.size(); i++) {
1030       Block& b = program->blocks[i];
1031 
1032       if (!(b.register_demand == block_demands[i])) {
1033          is_valid = false;
1034          aco_err(program,
1035                  "Register Demand not updated correctly for BB%d: got (%3u vgpr, %3u sgpr), but "
1036                  "should be (%3u vgpr, %3u sgpr)",
1037                  i, block_demands[i].vgpr, block_demands[i].sgpr, b.register_demand.vgpr,
1038                  b.register_demand.sgpr);
1039       }
1040       if (!(b.live_in_demand == live_in_demands[i])) {
1041          is_valid = false;
1042          aco_err(program,
1043                  "Live-in Demand not updated correctly for BB%d: got (%3u vgpr, %3u sgpr), but "
1044                  "should be (%3u vgpr, %3u sgpr)",
1045                  i, live_in_demands[i].vgpr, live_in_demands[i].sgpr, b.live_in_demand.vgpr,
1046                  b.live_in_demand.sgpr);
1047       }
1048 
1049       for (unsigned j = 0; j < b.instructions.size(); j++) {
1050          if (b.instructions[j]->register_demand == register_demands[i][j])
1051             continue;
1052 
1053          char* out;
1054          size_t outsize;
1055          struct u_memstream mem;
1056          u_memstream_open(&mem, &out, &outsize);
1057          FILE* const memf = u_memstream_get(&mem);
1058 
1059          fprintf(memf,
1060                  "Register Demand not updated correctly: got (%3u vgpr, %3u sgpr), but should be "
1061                  "(%3u vgpr, %3u sgpr): \n\t",
1062                  register_demands[i][j].vgpr, register_demands[i][j].sgpr,
1063                  b.instructions[j]->register_demand.vgpr, b.instructions[j]->register_demand.sgpr);
1064          aco_print_instr(program->gfx_level, b.instructions[j].get(), memf, print_kill);
1065          u_memstream_close(&mem);
1066 
1067          aco_err(program, "%s", out);
1068          free(out);
1069 
1070          is_valid = false;
1071       }
1072    }
1073    if (!(program->max_reg_demand == prev_max_demand) || program->num_waves != prev_num_waves) {
1074       is_valid = false;
1075       aco_err(program,
1076               "Max Register Demand and Num Waves not updated correctly: got (%3u vgpr, %3u sgpr) "
1077               "and %2u waves, but should be (%3u vgpr, %3u sgpr) and %2u waves",
1078               prev_max_demand.vgpr, prev_max_demand.sgpr, prev_num_waves,
1079               program->max_reg_demand.vgpr, program->max_reg_demand.sgpr, program->num_waves);
1080    }
1081 
1082    /* Validate Live-in sets */
1083    for (unsigned i = 0; i < program->blocks.size(); i++) {
1084       if (prev_live_in[i] != program->live.live_in[i]) {
1085          char* out;
1086          size_t outsize;
1087          struct u_memstream mem;
1088          u_memstream_open(&mem, &out, &outsize);
1089          FILE* const memf = u_memstream_get(&mem);
1090 
1091          fprintf(memf, "Live-in set not updated correctly for BB%d:", i);
1092          fprintf(memf, "\nMissing values: ");
1093          for (unsigned t : program->live.live_in[i]) {
1094             if (prev_live_in[i].count(t) == 0)
1095                fprintf(memf, "%%%d, ", t);
1096          }
1097          fprintf(memf, "\nAdditional values: ");
1098          for (unsigned t : prev_live_in[i]) {
1099             if (program->live.live_in[i].count(t) == 0)
1100                fprintf(memf, "%%%d, ", t);
1101          }
1102          u_memstream_close(&mem);
1103          aco_err(program, "%s", out);
1104          free(out);
1105          is_valid = false;
1106       }
1107    }
1108 
1109    return is_valid;
1110 }
1111 
1112 /* RA validation */
1113 namespace {
1114 
1115 struct Location {
Locationaco::__anon701c7ea20311::Location1116    Location() : block(NULL), instr(NULL) {}
1117 
1118    Block* block;
1119    Instruction* instr; // NULL if it's the block's live-in
1120 };
1121 
1122 struct Assignment {
1123    Location defloc;
1124    Location firstloc;
1125    PhysReg reg;
1126    bool valid;
1127 };
1128 
1129 bool
ra_fail(Program * program,Location loc,Location loc2,const char * fmt,...)1130 ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)
1131 {
1132    va_list args;
1133    va_start(args, fmt);
1134    char msg[1024];
1135    vsprintf(msg, fmt, args);
1136    va_end(args);
1137 
1138    char* out;
1139    size_t outsize;
1140    struct u_memstream mem;
1141    u_memstream_open(&mem, &out, &outsize);
1142    FILE* const memf = u_memstream_get(&mem);
1143 
1144    fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);
1145    if (loc.instr) {
1146       aco_print_instr(program->gfx_level, loc.instr, memf);
1147       fprintf(memf, "\n%s", msg);
1148    } else {
1149       fprintf(memf, "%s", msg);
1150    }
1151    if (loc2.block) {
1152       fprintf(memf, " in BB%d:\n", loc2.block->index);
1153       aco_print_instr(program->gfx_level, loc2.instr, memf);
1154    }
1155    fprintf(memf, "\n\n");
1156    u_memstream_close(&mem);
1157 
1158    aco_err(program, "%s", out);
1159    free(out);
1160 
1161    return true;
1162 }
1163 
1164 bool
validate_subdword_operand(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr,unsigned index)1165 validate_subdword_operand(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr,
1166                           unsigned index)
1167 {
1168    Operand op = instr->operands[index];
1169    unsigned byte = op.physReg().byte();
1170 
1171    if (instr->opcode == aco_opcode::p_as_uniform)
1172       return byte == 0;
1173    if (instr->isPseudo() && gfx_level >= GFX8)
1174       return true;
1175    if (instr->isSDWA())
1176       return byte + instr->sdwa().sel[index].offset() + instr->sdwa().sel[index].size() <= 4 &&
1177              byte % instr->sdwa().sel[index].size() == 0;
1178    if (instr->isVOP3P()) {
1179       bool fma_mix = instr->opcode == aco_opcode::v_fma_mixlo_f16 ||
1180                      instr->opcode == aco_opcode::v_fma_mixhi_f16 ||
1181                      instr->opcode == aco_opcode::v_fma_mix_f32;
1182       return instr->valu().opsel_lo[index] == (byte >> 1) &&
1183              instr->valu().opsel_hi[index] == (fma_mix || (byte >> 1));
1184    }
1185    if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, index))
1186       return true;
1187 
1188    switch (instr->opcode) {
1189    case aco_opcode::v_cvt_f32_ubyte1:
1190       if (byte == 1)
1191          return true;
1192       break;
1193    case aco_opcode::v_cvt_f32_ubyte2:
1194       if (byte == 2)
1195          return true;
1196       break;
1197    case aco_opcode::v_cvt_f32_ubyte3:
1198       if (byte == 3)
1199          return true;
1200       break;
1201    case aco_opcode::ds_write_b8_d16_hi:
1202    case aco_opcode::ds_write_b16_d16_hi:
1203       if (byte == 2 && index == 1)
1204          return true;
1205       break;
1206    case aco_opcode::buffer_store_byte_d16_hi:
1207    case aco_opcode::buffer_store_short_d16_hi:
1208    case aco_opcode::buffer_store_format_d16_hi_x:
1209       if (byte == 2 && index == 3)
1210          return true;
1211       break;
1212    case aco_opcode::flat_store_byte_d16_hi:
1213    case aco_opcode::flat_store_short_d16_hi:
1214    case aco_opcode::scratch_store_byte_d16_hi:
1215    case aco_opcode::scratch_store_short_d16_hi:
1216    case aco_opcode::global_store_byte_d16_hi:
1217    case aco_opcode::global_store_short_d16_hi:
1218       if (byte == 2 && index == 2)
1219          return true;
1220       break;
1221    default: break;
1222    }
1223 
1224    return byte == 0;
1225 }
1226 
1227 bool
validate_subdword_definition(amd_gfx_level gfx_level,const aco_ptr<Instruction> & instr)1228 validate_subdword_definition(amd_gfx_level gfx_level, const aco_ptr<Instruction>& instr)
1229 {
1230    Definition def = instr->definitions[0];
1231    unsigned byte = def.physReg().byte();
1232 
1233    if (instr->isPseudo() && gfx_level >= GFX8)
1234       return true;
1235    if (instr->isSDWA())
1236       return byte + instr->sdwa().dst_sel.offset() + instr->sdwa().dst_sel.size() <= 4 &&
1237              byte % instr->sdwa().dst_sel.size() == 0;
1238    if (byte == 2 && can_use_opsel(gfx_level, instr->opcode, -1))
1239       return true;
1240 
1241    switch (instr->opcode) {
1242    case aco_opcode::v_interp_p2_hi_f16:
1243    case aco_opcode::v_fma_mixhi_f16:
1244    case aco_opcode::buffer_load_ubyte_d16_hi:
1245    case aco_opcode::buffer_load_sbyte_d16_hi:
1246    case aco_opcode::buffer_load_short_d16_hi:
1247    case aco_opcode::buffer_load_format_d16_hi_x:
1248    case aco_opcode::flat_load_ubyte_d16_hi:
1249    case aco_opcode::flat_load_short_d16_hi:
1250    case aco_opcode::scratch_load_ubyte_d16_hi:
1251    case aco_opcode::scratch_load_short_d16_hi:
1252    case aco_opcode::global_load_ubyte_d16_hi:
1253    case aco_opcode::global_load_short_d16_hi:
1254    case aco_opcode::ds_read_u8_d16_hi:
1255    case aco_opcode::ds_read_u16_d16_hi: return byte == 2;
1256    case aco_opcode::p_v_cvt_pk_u8_f32: return true;
1257    default: break;
1258    }
1259 
1260    return byte == 0;
1261 }
1262 
1263 unsigned
get_subdword_bytes_written(Program * program,const aco_ptr<Instruction> & instr,unsigned index)1264 get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)
1265 {
1266    amd_gfx_level gfx_level = program->gfx_level;
1267    Definition def = instr->definitions[index];
1268 
1269    if (instr->isPseudo())
1270       return gfx_level >= GFX8 ? def.bytes() : def.size() * 4u;
1271    if (instr->isVALU() || instr->isVINTRP()) {
1272       assert(def.bytes() <= 2);
1273       if (instr->opcode == aco_opcode::p_v_cvt_pk_u8_f32)
1274          return 1;
1275 
1276       if (instr->isSDWA())
1277          return instr->sdwa().dst_sel.size();
1278 
1279       if (instr_is_16bit(gfx_level, instr->opcode))
1280          return 2;
1281 
1282       return 4;
1283    }
1284 
1285    if (instr->isMIMG()) {
1286       assert(instr->mimg().d16);
1287       return program->dev.sram_ecc_enabled ? def.size() * 4u : def.bytes();
1288    }
1289 
1290    switch (instr->opcode) {
1291    case aco_opcode::buffer_load_ubyte_d16:
1292    case aco_opcode::buffer_load_sbyte_d16:
1293    case aco_opcode::buffer_load_short_d16:
1294    case aco_opcode::buffer_load_format_d16_x:
1295    case aco_opcode::tbuffer_load_format_d16_x:
1296    case aco_opcode::flat_load_ubyte_d16:
1297    case aco_opcode::flat_load_short_d16:
1298    case aco_opcode::scratch_load_ubyte_d16:
1299    case aco_opcode::scratch_load_short_d16:
1300    case aco_opcode::global_load_ubyte_d16:
1301    case aco_opcode::global_load_short_d16:
1302    case aco_opcode::ds_read_u8_d16:
1303    case aco_opcode::ds_read_u16_d16:
1304    case aco_opcode::buffer_load_ubyte_d16_hi:
1305    case aco_opcode::buffer_load_sbyte_d16_hi:
1306    case aco_opcode::buffer_load_short_d16_hi:
1307    case aco_opcode::buffer_load_format_d16_hi_x:
1308    case aco_opcode::flat_load_ubyte_d16_hi:
1309    case aco_opcode::flat_load_short_d16_hi:
1310    case aco_opcode::scratch_load_ubyte_d16_hi:
1311    case aco_opcode::scratch_load_short_d16_hi:
1312    case aco_opcode::global_load_ubyte_d16_hi:
1313    case aco_opcode::global_load_short_d16_hi:
1314    case aco_opcode::ds_read_u8_d16_hi:
1315    case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;
1316    case aco_opcode::buffer_load_format_d16_xyz:
1317    case aco_opcode::tbuffer_load_format_d16_xyz: return program->dev.sram_ecc_enabled ? 8 : 6;
1318    default: return def.size() * 4;
1319    }
1320 }
1321 
1322 bool
validate_instr_defs(Program * program,std::array<unsigned,2048> & regs,const std::vector<Assignment> & assignments,const Location & loc,aco_ptr<Instruction> & instr)1323 validate_instr_defs(Program* program, std::array<unsigned, 2048>& regs,
1324                     const std::vector<Assignment>& assignments, const Location& loc,
1325                     aco_ptr<Instruction>& instr)
1326 {
1327    bool err = false;
1328 
1329    for (unsigned i = 0; i < instr->definitions.size(); i++) {
1330       Definition& def = instr->definitions[i];
1331       if (!def.isTemp())
1332          continue;
1333       Temp tmp = def.getTemp();
1334       PhysReg reg = assignments[tmp.id()].reg;
1335       for (unsigned j = 0; j < tmp.bytes(); j++) {
1336          if (regs[reg.reg_b + j])
1337             err |=
1338                ra_fail(program, loc, assignments[regs[reg.reg_b + j]].defloc,
1339                        "Assignment of element %d of %%%d already taken by %%%d from instruction", i,
1340                        tmp.id(), regs[reg.reg_b + j]);
1341          regs[reg.reg_b + j] = tmp.id();
1342       }
1343       if (def.regClass().is_subdword() && def.bytes() < 4) {
1344          unsigned written = get_subdword_bytes_written(program, instr, i);
1345          /* If written=4, the instruction still might write the upper half. In that case, it's
1346           * the lower half that isn't preserved */
1347          for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {
1348             unsigned written_reg = reg.reg() * 4u + j;
1349             if (regs[written_reg] && regs[written_reg] != def.tempId())
1350                err |= ra_fail(program, loc, assignments[regs[written_reg]].defloc,
1351                               "Assignment of element %d of %%%d overwrites the full register "
1352                               "taken by %%%d from instruction",
1353                               i, tmp.id(), regs[written_reg]);
1354          }
1355       }
1356    }
1357 
1358    for (const Definition& def : instr->definitions) {
1359       if (!def.isTemp())
1360          continue;
1361       if (def.isKill()) {
1362          for (unsigned j = 0; j < def.getTemp().bytes(); j++)
1363             regs[def.physReg().reg_b + j] = 0;
1364       }
1365    }
1366 
1367    return err;
1368 }
1369 
1370 } /* end namespace */
1371 
1372 bool
validate_ra(Program * program)1373 validate_ra(Program* program)
1374 {
1375    if (!(debug_flags & DEBUG_VALIDATE_RA))
1376       return false;
1377 
1378    bool err = false;
1379    aco::live_var_analysis(program);
1380    std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());
1381    uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);
1382 
1383    std::vector<Assignment> assignments(program->peekAllocationId());
1384    for (Block& block : program->blocks) {
1385       Location loc;
1386       loc.block = &block;
1387       for (aco_ptr<Instruction>& instr : block.instructions) {
1388          if (instr->opcode == aco_opcode::p_phi) {
1389             for (unsigned i = 0; i < instr->operands.size(); i++) {
1390                if (instr->operands[i].isTemp() &&
1391                    instr->operands[i].getTemp().type() == RegType::sgpr &&
1392                    instr->operands[i].isFirstKill())
1393                   phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());
1394             }
1395          }
1396 
1397          loc.instr = instr.get();
1398          for (unsigned i = 0; i < instr->operands.size(); i++) {
1399             Operand& op = instr->operands[i];
1400             if (!op.isTemp())
1401                continue;
1402             if (!op.isFixed())
1403                err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);
1404             if (assignments[op.tempId()].valid && assignments[op.tempId()].reg != op.physReg())
1405                err |=
1406                   ra_fail(program, loc, assignments[op.tempId()].firstloc,
1407                           "Operand %d has an inconsistent register assignment with instruction", i);
1408             if ((op.getTemp().type() == RegType::vgpr &&
1409                  op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||
1410                 (op.getTemp().type() == RegType::sgpr &&
1411                  op.physReg() + op.size() > program->config->num_sgprs &&
1412                  op.physReg() < sgpr_limit))
1413                err |= ra_fail(program, loc, assignments[op.tempId()].firstloc,
1414                               "Operand %d has an out-of-bounds register assignment", i);
1415             if (op.physReg() == vcc && !program->needs_vcc)
1416                err |= ra_fail(program, loc, Location(),
1417                               "Operand %d fixed to vcc but needs_vcc=false", i);
1418             if (op.regClass().is_subdword() &&
1419                 !validate_subdword_operand(program->gfx_level, instr, i))
1420                err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);
1421             if (!assignments[op.tempId()].firstloc.block)
1422                assignments[op.tempId()].firstloc = loc;
1423             if (!assignments[op.tempId()].defloc.block) {
1424                assignments[op.tempId()].reg = op.physReg();
1425                assignments[op.tempId()].valid = true;
1426             }
1427          }
1428 
1429          for (unsigned i = 0; i < instr->definitions.size(); i++) {
1430             Definition& def = instr->definitions[i];
1431             if (!def.isTemp())
1432                continue;
1433             if (!def.isFixed())
1434                err |=
1435                   ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);
1436             if (assignments[def.tempId()].defloc.block)
1437                err |= ra_fail(program, loc, assignments[def.tempId()].defloc,
1438                               "Temporary %%%d also defined by instruction", def.tempId());
1439             if ((def.getTemp().type() == RegType::vgpr &&
1440                  def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||
1441                 (def.getTemp().type() == RegType::sgpr &&
1442                  def.physReg() + def.size() > program->config->num_sgprs &&
1443                  def.physReg() < sgpr_limit))
1444                err |= ra_fail(program, loc, assignments[def.tempId()].firstloc,
1445                               "Definition %d has an out-of-bounds register assignment", i);
1446             if (def.physReg() == vcc && !program->needs_vcc)
1447                err |= ra_fail(program, loc, Location(),
1448                               "Definition %d fixed to vcc but needs_vcc=false", i);
1449             if (def.regClass().is_subdword() &&
1450                 !validate_subdword_definition(program->gfx_level, instr))
1451                err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);
1452             if (!assignments[def.tempId()].firstloc.block)
1453                assignments[def.tempId()].firstloc = loc;
1454             assignments[def.tempId()].defloc = loc;
1455             assignments[def.tempId()].reg = def.physReg();
1456             assignments[def.tempId()].valid = true;
1457          }
1458       }
1459    }
1460 
1461    for (Block& block : program->blocks) {
1462       Location loc;
1463       loc.block = &block;
1464 
1465       std::array<unsigned, 2048> regs; /* register file in bytes */
1466       regs.fill(0);
1467 
1468       /* check live in */
1469       for (unsigned id : program->live.live_in[block.index]) {
1470          Temp tmp(id, program->temp_rc[id]);
1471          PhysReg reg = assignments[id].reg;
1472          for (unsigned i = 0; i < tmp.bytes(); i++) {
1473             if (regs[reg.reg_b + i]) {
1474                err |= ra_fail(program, loc, Location(),
1475                               "Assignment of element %d of %%%d already taken by %%%d in live-in",
1476                               i, id, regs[reg.reg_b + i]);
1477             }
1478             regs[reg.reg_b + i] = id;
1479          }
1480       }
1481 
1482       for (aco_ptr<Instruction>& instr : block.instructions) {
1483          loc.instr = instr.get();
1484 
1485          /* remove killed p_phi operands from regs */
1486          if (instr->opcode == aco_opcode::p_logical_end) {
1487             for (Temp tmp : phi_sgpr_ops[block.index]) {
1488                PhysReg reg = assignments[tmp.id()].reg;
1489                for (unsigned i = 0; i < tmp.bytes(); i++)
1490                   regs[reg.reg_b + i] = 0;
1491             }
1492          }
1493 
1494          if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {
1495             for (const Operand& op : instr->operands) {
1496                if (!op.isTemp())
1497                   continue;
1498                if (op.isFirstKillBeforeDef()) {
1499                   for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1500                      regs[op.physReg().reg_b + j] = 0;
1501                }
1502             }
1503          }
1504 
1505          if (!instr->isBranch() || block.linear_succs.size() != 1)
1506             err |= validate_instr_defs(program, regs, assignments, loc, instr);
1507 
1508          if (!is_phi(instr)) {
1509             for (const Operand& op : instr->operands) {
1510                if (!op.isTemp())
1511                   continue;
1512                if (op.isLateKill() && op.isFirstKill()) {
1513                   for (unsigned j = 0; j < op.getTemp().bytes(); j++)
1514                      regs[op.physReg().reg_b + j] = 0;
1515                }
1516             }
1517          } else if (block.linear_preds.size() != 1 ||
1518                     program->blocks[block.linear_preds[0]].linear_succs.size() == 1) {
1519             for (unsigned pred : block.linear_preds) {
1520                aco_ptr<Instruction>& br = program->blocks[pred].instructions.back();
1521                assert(br->isBranch());
1522                err |= validate_instr_defs(program, regs, assignments, loc, br);
1523             }
1524          }
1525       }
1526    }
1527 
1528    return err;
1529 }
1530 } // namespace aco
1531