xref: /aosp_15_r20/external/mesa3d/src/broadcom/qpu/qpu_pack.c (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright © 2016 Broadcom
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  */
23 
24 #include <string.h>
25 #include "util/macros.h"
26 #include "util/bitscan.h"
27 
28 #include "broadcom/common/v3d_device_info.h"
29 #include "qpu_instr.h"
30 
31 #ifndef QPU_MASK
32 #define QPU_MASK(high, low) ((((uint64_t)1<<((high)-(low)+1))-1)<<(low))
33 /* Using the GNU statement expression extension */
34 #define QPU_SET_FIELD(value, field)                                       \
35         ({                                                                \
36                 uint64_t fieldval = (uint64_t)(value) << field ## _SHIFT; \
37                 assert((fieldval & ~ field ## _MASK) == 0);               \
38                 fieldval & field ## _MASK;                                \
39          })
40 
41 #define QPU_GET_FIELD(word, field) ((uint32_t)(((word)  & field ## _MASK) >> field ## _SHIFT))
42 
43 #define QPU_UPDATE_FIELD(inst, value, field)                              \
44         (((inst) & ~(field ## _MASK)) | QPU_SET_FIELD(value, field))
45 #endif /* QPU_MASK */
46 
47 #define V3D_QPU_OP_MUL_SHIFT                58
48 #define V3D_QPU_OP_MUL_MASK                 QPU_MASK(63, 58)
49 
50 #define V3D_QPU_SIG_SHIFT                   53
51 #define V3D_QPU_SIG_MASK                    QPU_MASK(57, 53)
52 
53 #define V3D_QPU_COND_SHIFT                  46
54 #define V3D_QPU_COND_MASK                   QPU_MASK(52, 46)
55 #define V3D_QPU_COND_SIG_MAGIC_ADDR         (1 << 6)
56 
57 #define V3D_QPU_MM                          QPU_MASK(45, 45)
58 #define V3D_QPU_MA                          QPU_MASK(44, 44)
59 
60 #define V3D_QPU_WADDR_M_SHIFT               38
61 #define V3D_QPU_WADDR_M_MASK                QPU_MASK(43, 38)
62 
63 #define V3D_QPU_BRANCH_ADDR_LOW_SHIFT       35
64 #define V3D_QPU_BRANCH_ADDR_LOW_MASK        QPU_MASK(55, 35)
65 
66 #define V3D_QPU_WADDR_A_SHIFT               32
67 #define V3D_QPU_WADDR_A_MASK                QPU_MASK(37, 32)
68 
69 #define V3D_QPU_BRANCH_COND_SHIFT           32
70 #define V3D_QPU_BRANCH_COND_MASK            QPU_MASK(34, 32)
71 
72 #define V3D_QPU_BRANCH_ADDR_HIGH_SHIFT      24
73 #define V3D_QPU_BRANCH_ADDR_HIGH_MASK       QPU_MASK(31, 24)
74 
75 #define V3D_QPU_OP_ADD_SHIFT                24
76 #define V3D_QPU_OP_ADD_MASK                 QPU_MASK(31, 24)
77 
78 #define V3D_QPU_MUL_B_SHIFT                 21
79 #define V3D_QPU_MUL_B_MASK                  QPU_MASK(23, 21)
80 
81 #define V3D_QPU_BRANCH_MSFIGN_SHIFT         21
82 #define V3D_QPU_BRANCH_MSFIGN_MASK          QPU_MASK(22, 21)
83 
84 #define V3D_QPU_MUL_A_SHIFT                 18
85 #define V3D_QPU_MUL_A_MASK                  QPU_MASK(20, 18)
86 
87 #define V3D_QPU_RADDR_C_SHIFT               18
88 #define V3D_QPU_RADDR_C_MASK                QPU_MASK(23, 18)
89 
90 #define V3D_QPU_ADD_B_SHIFT                 15
91 #define V3D_QPU_ADD_B_MASK                  QPU_MASK(17, 15)
92 
93 #define V3D_QPU_BRANCH_BDU_SHIFT            15
94 #define V3D_QPU_BRANCH_BDU_MASK             QPU_MASK(17, 15)
95 
96 #define V3D_QPU_BRANCH_UB                   QPU_MASK(14, 14)
97 
98 #define V3D_QPU_ADD_A_SHIFT                 12
99 #define V3D_QPU_ADD_A_MASK                  QPU_MASK(14, 12)
100 
101 #define V3D_QPU_BRANCH_BDI_SHIFT            12
102 #define V3D_QPU_BRANCH_BDI_MASK             QPU_MASK(13, 12)
103 
104 #define V3D_QPU_RADDR_D_SHIFT               12
105 #define V3D_QPU_RADDR_D_MASK                QPU_MASK(17, 12)
106 
107 #define V3D_QPU_RADDR_A_SHIFT               6
108 #define V3D_QPU_RADDR_A_MASK                QPU_MASK(11, 6)
109 
110 #define V3D_QPU_RADDR_B_SHIFT               0
111 #define V3D_QPU_RADDR_B_MASK                QPU_MASK(5, 0)
112 
113 #define THRSW .thrsw = true
114 #define LDUNIF .ldunif = true
115 #define LDUNIFRF .ldunifrf = true
116 #define LDUNIFA .ldunifa = true
117 #define LDUNIFARF .ldunifarf = true
118 #define LDTMU .ldtmu = true
119 #define LDVARY .ldvary = true
120 #define LDVPM .ldvpm = true
121 #define LDTLB .ldtlb = true
122 #define LDTLBU .ldtlbu = true
123 #define UCB .ucb = true
124 #define ROT .rotate = true
125 #define WRTMUC .wrtmuc = true
126 #define SMIMM_A .small_imm_a = true
127 #define SMIMM_B .small_imm_b = true
128 #define SMIMM_C .small_imm_c = true
129 #define SMIMM_D .small_imm_d = true
130 
131 static const struct v3d_qpu_sig v3d42_sig_map[] = {
132         /*      MISC       phys    R5 */
133         [0]  = {                          },
134         [1]  = { THRSW,                   },
135         [2]  = {                   LDUNIF },
136         [3]  = { THRSW,            LDUNIF },
137         [4]  = {           LDTMU,         },
138         [5]  = { THRSW,    LDTMU,         },
139         [6]  = {           LDTMU,  LDUNIF },
140         [7]  = { THRSW,    LDTMU,  LDUNIF },
141         [8]  = {           LDVARY,        },
142         [9]  = { THRSW,    LDVARY,        },
143         [10] = {           LDVARY, LDUNIF },
144         [11] = { THRSW,    LDVARY, LDUNIF },
145         [12] = { LDUNIFRF                 },
146         [13] = { THRSW,    LDUNIFRF       },
147         [14] = { SMIMM_B,    LDVARY       },
148         [15] = { SMIMM_B,                 },
149         [16] = {           LDTLB,         },
150         [17] = {           LDTLBU,        },
151         [18] = {                          WRTMUC },
152         [19] = { THRSW,                   WRTMUC },
153         [20] = {           LDVARY,        WRTMUC },
154         [21] = { THRSW,    LDVARY,        WRTMUC },
155         [22] = { UCB,                     },
156         [23] = { ROT,                     },
157         [24] = {                   LDUNIFA},
158         [25] = { LDUNIFARF                },
159         /* 26-30 reserved */
160         [31] = { SMIMM_B,          LDTMU, },
161 };
162 
163 
164 static const struct v3d_qpu_sig v3d71_sig_map[] = {
165         /*      MISC       phys    RF0 */
166         [0]  = {                          },
167         [1]  = { THRSW,                   },
168         [2]  = {                   LDUNIF },
169         [3]  = { THRSW,            LDUNIF },
170         [4]  = {           LDTMU,         },
171         [5]  = { THRSW,    LDTMU,         },
172         [6]  = {           LDTMU,  LDUNIF },
173         [7]  = { THRSW,    LDTMU,  LDUNIF },
174         [8]  = {           LDVARY,        },
175         [9]  = { THRSW,    LDVARY,        },
176         [10] = {           LDVARY, LDUNIF },
177         [11] = { THRSW,    LDVARY, LDUNIF },
178         [12] = { LDUNIFRF                 },
179         [13] = { THRSW,    LDUNIFRF       },
180         [14] = { SMIMM_A,                 },
181         [15] = { SMIMM_B,                 },
182         [16] = {           LDTLB,         },
183         [17] = {           LDTLBU,        },
184         [18] = {                          WRTMUC },
185         [19] = { THRSW,                   WRTMUC },
186         [20] = {           LDVARY,        WRTMUC },
187         [21] = { THRSW,    LDVARY,        WRTMUC },
188         [22] = { UCB,                     },
189         /* 23 reserved */
190         [24] = {                   LDUNIFA},
191         [25] = { LDUNIFARF                },
192         [26] = {           LDTMU,         WRTMUC },
193         [27] = { THRSW,    LDTMU,         WRTMUC },
194         /* 28-29 reserved */
195         [30] = { SMIMM_C,                 },
196         [31] = { SMIMM_D,                 },
197 };
198 
199 bool
v3d_qpu_sig_unpack(const struct v3d_device_info * devinfo,uint32_t packed_sig,struct v3d_qpu_sig * sig)200 v3d_qpu_sig_unpack(const struct v3d_device_info *devinfo,
201                    uint32_t packed_sig,
202                    struct v3d_qpu_sig *sig)
203 {
204         if (packed_sig >= ARRAY_SIZE(v3d42_sig_map))
205                 return false;
206 
207         if (devinfo->ver >= 71)
208                 *sig = v3d71_sig_map[packed_sig];
209         else
210                 *sig = v3d42_sig_map[packed_sig];
211 
212         /* Signals with zeroed unpacked contents after element 0 are reserved. */
213         return (packed_sig == 0 ||
214                 memcmp(sig, &v3d42_sig_map[0], sizeof(*sig)) != 0);
215 }
216 
217 bool
v3d_qpu_sig_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_sig * sig,uint32_t * packed_sig)218 v3d_qpu_sig_pack(const struct v3d_device_info *devinfo,
219                  const struct v3d_qpu_sig *sig,
220                  uint32_t *packed_sig)
221 {
222         static const struct v3d_qpu_sig *map;
223 
224         if (devinfo->ver >= 71)
225                 map = v3d71_sig_map;
226         else
227                 map = v3d42_sig_map;
228 
229         for (int i = 0; i < ARRAY_SIZE(v3d42_sig_map); i++) {
230                 if (memcmp(&map[i], sig, sizeof(*sig)) == 0) {
231                         *packed_sig = i;
232                         return true;
233                 }
234         }
235 
236         return false;
237 }
238 
239 static const uint32_t small_immediates[] = {
240         0, 1, 2, 3,
241         4, 5, 6, 7,
242         8, 9, 10, 11,
243         12, 13, 14, 15,
244         -16, -15, -14, -13,
245         -12, -11, -10, -9,
246         -8, -7, -6, -5,
247         -4, -3, -2, -1,
248         0x3b800000, /* 2.0^-8 */
249         0x3c000000, /* 2.0^-7 */
250         0x3c800000, /* 2.0^-6 */
251         0x3d000000, /* 2.0^-5 */
252         0x3d800000, /* 2.0^-4 */
253         0x3e000000, /* 2.0^-3 */
254         0x3e800000, /* 2.0^-2 */
255         0x3f000000, /* 2.0^-1 */
256         0x3f800000, /* 2.0^0 */
257         0x40000000, /* 2.0^1 */
258         0x40800000, /* 2.0^2 */
259         0x41000000, /* 2.0^3 */
260         0x41800000, /* 2.0^4 */
261         0x42000000, /* 2.0^5 */
262         0x42800000, /* 2.0^6 */
263         0x43000000, /* 2.0^7 */
264 };
265 
266 bool
v3d_qpu_small_imm_unpack(const struct v3d_device_info * devinfo,uint32_t packed_small_immediate,uint32_t * small_immediate)267 v3d_qpu_small_imm_unpack(const struct v3d_device_info *devinfo,
268                          uint32_t packed_small_immediate,
269                          uint32_t *small_immediate)
270 {
271         if (packed_small_immediate >= ARRAY_SIZE(small_immediates))
272                 return false;
273 
274         *small_immediate = small_immediates[packed_small_immediate];
275         return true;
276 }
277 
278 bool
v3d_qpu_small_imm_pack(const struct v3d_device_info * devinfo,uint32_t value,uint32_t * packed_small_immediate)279 v3d_qpu_small_imm_pack(const struct v3d_device_info *devinfo,
280                        uint32_t value,
281                        uint32_t *packed_small_immediate)
282 {
283         STATIC_ASSERT(ARRAY_SIZE(small_immediates) == 48);
284 
285         for (int i = 0; i < ARRAY_SIZE(small_immediates); i++) {
286                 if (small_immediates[i] == value) {
287                         *packed_small_immediate = i;
288                         return true;
289                 }
290         }
291 
292         return false;
293 }
294 
295 bool
v3d_qpu_flags_unpack(const struct v3d_device_info * devinfo,uint32_t packed_cond,struct v3d_qpu_flags * cond)296 v3d_qpu_flags_unpack(const struct v3d_device_info *devinfo,
297                      uint32_t packed_cond,
298                      struct v3d_qpu_flags *cond)
299 {
300         static const enum v3d_qpu_cond cond_map[4] = {
301                 [0] = V3D_QPU_COND_IFA,
302                 [1] = V3D_QPU_COND_IFB,
303                 [2] = V3D_QPU_COND_IFNA,
304                 [3] = V3D_QPU_COND_IFNB,
305         };
306 
307         cond->ac = V3D_QPU_COND_NONE;
308         cond->mc = V3D_QPU_COND_NONE;
309         cond->apf = V3D_QPU_PF_NONE;
310         cond->mpf = V3D_QPU_PF_NONE;
311         cond->auf = V3D_QPU_UF_NONE;
312         cond->muf = V3D_QPU_UF_NONE;
313 
314         if (packed_cond == 0) {
315                 return true;
316         } else if (packed_cond >> 2 == 0) {
317                 cond->apf = packed_cond & 0x3;
318         } else if (packed_cond >> 4 == 0) {
319                 cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
320         } else if (packed_cond == 0x10) {
321                 return false;
322         } else if (packed_cond >> 2 == 0x4) {
323                 cond->mpf = packed_cond & 0x3;
324         } else if (packed_cond >> 4 == 0x1) {
325                 cond->muf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
326         } else if (packed_cond >> 4 == 0x2) {
327                 cond->ac = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
328                 cond->mpf = packed_cond & 0x3;
329         } else if (packed_cond >> 4 == 0x3) {
330                 cond->mc = ((packed_cond >> 2) & 0x3) + V3D_QPU_COND_IFA;
331                 cond->apf = packed_cond & 0x3;
332         } else if (packed_cond >> 6) {
333                 cond->mc = cond_map[(packed_cond >> 4) & 0x3];
334                 if (((packed_cond >> 2) & 0x3) == 0) {
335                         cond->ac = cond_map[packed_cond & 0x3];
336                 } else {
337                         cond->auf = (packed_cond & 0xf) - 4 + V3D_QPU_UF_ANDZ;
338                 }
339         }
340 
341         return true;
342 }
343 
344 bool
v3d_qpu_flags_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_flags * cond,uint32_t * packed_cond)345 v3d_qpu_flags_pack(const struct v3d_device_info *devinfo,
346                    const struct v3d_qpu_flags *cond,
347                    uint32_t *packed_cond)
348 {
349 #define AC (1 << 0)
350 #define MC (1 << 1)
351 #define APF (1 << 2)
352 #define MPF (1 << 3)
353 #define AUF (1 << 4)
354 #define MUF (1 << 5)
355         static const struct {
356                 uint8_t flags_present;
357                 uint8_t bits;
358         } flags_table[] = {
359                 { 0,        0 },
360                 { APF,      0 },
361                 { AUF,      0 },
362                 { MPF,      (1 << 4) },
363                 { MUF,      (1 << 4) },
364                 { AC,       (1 << 5) },
365                 { AC | MPF, (1 << 5) },
366                 { MC,       (1 << 5) | (1 << 4) },
367                 { MC | APF, (1 << 5) | (1 << 4) },
368                 { MC | AC,  (1 << 6) },
369                 { MC | AUF, (1 << 6) },
370         };
371 
372         uint8_t flags_present = 0;
373         if (cond->ac != V3D_QPU_COND_NONE)
374                 flags_present |= AC;
375         if (cond->mc != V3D_QPU_COND_NONE)
376                 flags_present |= MC;
377         if (cond->apf != V3D_QPU_PF_NONE)
378                 flags_present |= APF;
379         if (cond->mpf != V3D_QPU_PF_NONE)
380                 flags_present |= MPF;
381         if (cond->auf != V3D_QPU_UF_NONE)
382                 flags_present |= AUF;
383         if (cond->muf != V3D_QPU_UF_NONE)
384                 flags_present |= MUF;
385 
386         for (int i = 0; i < ARRAY_SIZE(flags_table); i++) {
387                 if (flags_table[i].flags_present != flags_present)
388                         continue;
389 
390                 *packed_cond = flags_table[i].bits;
391 
392                 *packed_cond |= cond->apf;
393                 *packed_cond |= cond->mpf;
394 
395                 if (flags_present & AUF)
396                         *packed_cond |= cond->auf - V3D_QPU_UF_ANDZ + 4;
397                 if (flags_present & MUF)
398                         *packed_cond |= cond->muf - V3D_QPU_UF_ANDZ + 4;
399 
400                 if (flags_present & AC) {
401                         if (*packed_cond & (1 << 6))
402                                 *packed_cond |= cond->ac - V3D_QPU_COND_IFA;
403                         else
404                                 *packed_cond |= (cond->ac -
405                                                  V3D_QPU_COND_IFA) << 2;
406                 }
407 
408                 if (flags_present & MC) {
409                         if (*packed_cond & (1 << 6))
410                                 *packed_cond |= (cond->mc -
411                                                  V3D_QPU_COND_IFA) << 4;
412                         else
413                                 *packed_cond |= (cond->mc -
414                                                  V3D_QPU_COND_IFA) << 2;
415                 }
416 
417                 return true;
418         }
419 
420         return false;
421 }
422 
423 /* Make a mapping of the table of opcodes in the spec.  The opcode is
424  * determined by a combination of the opcode field, and in the case of 0 or
425  * 1-arg opcodes, the mux (version <= 42) or raddr (version >= 71) field as
426  * well.
427  */
428 #define OP_MASK(val) BITFIELD64_BIT(val)
429 #define OP_RANGE(bot, top) BITFIELD64_RANGE(bot, top - bot + 1)
430 #define ANYMUX OP_RANGE(0, 7)
431 #define ANYOPMASK OP_RANGE(0, 63)
432 
433 struct opcode_desc {
434         uint8_t opcode_first;
435         uint8_t opcode_last;
436 
437         union {
438                 struct {
439                         uint8_t b_mask;
440                         uint8_t a_mask;
441                 } mux;
442                 uint64_t raddr_mask;
443         };
444 
445         uint8_t op;
446 
447         /* first_ver == 0 if it's the same across all V3D versions.
448          * first_ver == X, last_ver == 0 if it's the same for all V3D versions
449          *   starting from X
450          * first_ver == X, last_ver == Y if it's the same for all V3D versions
451          *   on the range X through Y
452          */
453         uint8_t first_ver;
454         uint8_t last_ver;
455 };
456 
457 static const struct opcode_desc v3d42_add_ops[] = {
458         /* FADD is FADDNF depending on the order of the mux_a/mux_b. */
459         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADD },
460         { 0,   47,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FADDNF },
461         { 53,  55,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
462         { 56,  56,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ADD },
463         { 57,  59,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
464         { 60,  60,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SUB },
465         { 61,  63,  .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFPACK },
466         { 64,  111, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FSUB },
467         { 120, 120, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MIN },
468         { 121, 121, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_MAX },
469         { 122, 122, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMIN },
470         { 123, 123, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_UMAX },
471         { 124, 124, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHL },
472         { 125, 125, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_SHR },
473         { 126, 126, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ASR },
474         { 127, 127, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_ROR },
475         /* FMIN is instead FMAX depending on the order of the mux_a/mux_b. */
476         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMIN },
477         { 128, 175, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FMAX },
478         { 176, 180, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMIN },
479 
480         { 181, 181, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_AND },
481         { 182, 182, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_OR },
482         { 183, 183, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_XOR },
483 
484         { 184, 184, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VADD },
485         { 185, 185, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VSUB },
486         { 186, 186, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_NOT },
487         { 186, 186, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_NEG },
488         { 186, 186, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_FLAPUSH },
489         { 186, 186, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FLBPUSH },
490         { 186, 186, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_FLPOP },
491         { 186, 186, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_RECIP },
492         { 186, 186, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SETMSF },
493         { 186, 186, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_SETREVF },
494         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
495         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(1), V3D_QPU_A_TIDX },
496         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(2), V3D_QPU_A_EIDX },
497         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(3), V3D_QPU_A_LR },
498         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(4), V3D_QPU_A_VFLA },
499         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
500         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VFLB },
501         { 187, 187, .mux.b_mask = OP_MASK(0), .mux.a_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
502 
503         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(0, 2), V3D_QPU_A_FXCD },
504         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(3), V3D_QPU_A_XCD },
505         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_RANGE(4, 6), V3D_QPU_A_FYCD },
506         { 187, 187, .mux.b_mask = OP_MASK(1), .mux.a_mask = OP_MASK(7), V3D_QPU_A_YCD },
507 
508         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(0), V3D_QPU_A_MSF },
509         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(1), V3D_QPU_A_REVF },
510         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_VDWWT, 33 },
511         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(2), V3D_QPU_A_IID, 40 },
512         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(3), V3D_QPU_A_SAMPID, 40 },
513         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(4), V3D_QPU_A_BARRIERID, 40 },
514         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(5), V3D_QPU_A_TMUWT },
515         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(6), V3D_QPU_A_VPMWT },
516         { 187, 187, .mux.b_mask = OP_MASK(2), .mux.a_mask = OP_MASK(7), V3D_QPU_A_FLAFIRST, 41 },
517         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = OP_MASK(0), V3D_QPU_A_FLNAFIRST, 41 },
518         { 187, 187, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_VPMSETUP, 33 },
519 
520         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_IN, 40 },
521         { 188, 188, .mux.b_mask = OP_MASK(0), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMV_OUT, 40 },
522         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_IN, 40 },
523         { 188, 188, .mux.b_mask = OP_MASK(1), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMD_OUT, 40 },
524         { 188, 188, .mux.b_mask = OP_MASK(2), .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMP, 40 },
525         { 188, 188, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT, 41 },
526         { 188, 188, .mux.b_mask = OP_MASK(4), .mux.a_mask = ANYMUX, V3D_QPU_A_EXP, 41 },
527         { 188, 188, .mux.b_mask = OP_MASK(5), .mux.a_mask = ANYMUX, V3D_QPU_A_LOG, 41 },
528         { 188, 188, .mux.b_mask = OP_MASK(6), .mux.a_mask = ANYMUX, V3D_QPU_A_SIN, 41 },
529         { 188, 188, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_RSQRT2, 41 },
530         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_IN, 40 },
531         { 189, 189, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_LDVPMG_OUT, 40 },
532 
533         /* FIXME: MORE COMPLICATED */
534         /* { 190, 191, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMOVABSNEGNAB }, */
535 
536         { 192, 239, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_FCMP },
537         { 240, 244, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_VFMAX },
538 
539         { 245, 245, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FROUND },
540         { 245, 245, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIN },
541         { 245, 245, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FTRUNC },
542         { 245, 245, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOIZ },
543         { 246, 246, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FFLOOR },
544         { 246, 246, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOUZ },
545         { 246, 246, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FCEIL },
546         { 246, 246, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_A_FTOC },
547 
548         { 247, 247, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_FDX },
549         { 247, 247, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_FDY },
550 
551         /* The stvpms are distinguished by the waddr field. */
552         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMV },
553         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMD },
554         { 248, 248, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_A_STVPMP },
555 
556         { 252, 252, .mux.b_mask = OP_RANGE(0, 2), .mux.a_mask = ANYMUX, V3D_QPU_A_ITOF },
557         { 252, 252, .mux.b_mask = OP_MASK(3), .mux.a_mask = ANYMUX, V3D_QPU_A_CLZ },
558         { 252, 252, .mux.b_mask = OP_RANGE(4, 6), .mux.a_mask = ANYMUX, V3D_QPU_A_UTOF },
559 };
560 
561 static const struct opcode_desc v3d42_mul_ops[] = {
562         { 1, 1, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_ADD },
563         { 2, 2, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SUB },
564         { 3, 3, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_UMUL24 },
565         { 4, 8, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_VFMUL },
566         { 9, 9, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_SMUL24 },
567         { 10, 10, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_MULTOP },
568         { 14, 14, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMOV, 33, 42 },
569         { 15, 15, .mux.b_mask = OP_RANGE(0, 3), ANYMUX, V3D_QPU_M_FMOV, 33, 42},
570         { 15, 15, .mux.b_mask = OP_MASK(4), .mux.a_mask = OP_MASK(0), V3D_QPU_M_NOP, 33, 42 },
571         { 15, 15, .mux.b_mask = OP_MASK(7), .mux.a_mask = ANYMUX, V3D_QPU_M_MOV, 33, 42 },
572 
573         { 16, 63, .mux.b_mask = ANYMUX, .mux.a_mask = ANYMUX, V3D_QPU_M_FMUL },
574 };
575 
576 /* Note that it would have been possible to define all the add/mul opcodes in
577  * just one table, using the first_ver/last_ver. But taking into account that
578  * for v3d71 there were a lot of changes, it was more tidy this way. Also
579  * right now we are doing a linear search on those tables, so this maintains
580  * the tables smaller.
581  *
582  * Just in case we merge the tables, we define the first_ver as 71 for those
583  * opcodes that changed on v3d71
584  */
585 static const struct opcode_desc v3d71_add_ops[] = {
586         /* FADD is FADDNF depending on the order of the raddr_a/raddr_b. */
587         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADD },
588         { 0,   47,  .raddr_mask = ANYOPMASK, V3D_QPU_A_FADDNF },
589         { 53,  55,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
590         { 56,  56,  .raddr_mask = ANYOPMASK, V3D_QPU_A_ADD },
591         { 57,  59,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
592         { 60,  60,  .raddr_mask = ANYOPMASK, V3D_QPU_A_SUB },
593         { 61,  63,  .raddr_mask = ANYOPMASK, V3D_QPU_A_VFPACK },
594         { 64,  111, .raddr_mask = ANYOPMASK, V3D_QPU_A_FSUB },
595         { 120, 120, .raddr_mask = ANYOPMASK, V3D_QPU_A_MIN },
596         { 121, 121, .raddr_mask = ANYOPMASK, V3D_QPU_A_MAX },
597         { 122, 122, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMIN },
598         { 123, 123, .raddr_mask = ANYOPMASK, V3D_QPU_A_UMAX },
599         { 124, 124, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHL },
600         { 125, 125, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHR },
601         { 126, 126, .raddr_mask = ANYOPMASK, V3D_QPU_A_ASR },
602         { 127, 127, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROR },
603         /* FMIN is instead FMAX depending on the raddr_a/b order. */
604         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMIN },
605         { 128, 175, .raddr_mask = ANYOPMASK, V3D_QPU_A_FMAX },
606         { 176, 180, .raddr_mask = ANYOPMASK, V3D_QPU_A_VFMIN },
607 
608         { 181, 181, .raddr_mask = ANYOPMASK, V3D_QPU_A_AND },
609         { 182, 182, .raddr_mask = ANYOPMASK, V3D_QPU_A_OR },
610         { 183, 183, .raddr_mask = ANYOPMASK, V3D_QPU_A_XOR },
611         { 184, 184, .raddr_mask = ANYOPMASK, V3D_QPU_A_VADD },
612         { 185, 185, .raddr_mask = ANYOPMASK, V3D_QPU_A_VSUB },
613 
614         { 186, 186, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOT },
615         { 186, 186, .raddr_mask = OP_MASK(1), V3D_QPU_A_NEG },
616         { 186, 186, .raddr_mask = OP_MASK(2), V3D_QPU_A_FLAPUSH },
617         { 186, 186, .raddr_mask = OP_MASK(3), V3D_QPU_A_FLBPUSH },
618         { 186, 186, .raddr_mask = OP_MASK(4), V3D_QPU_A_FLPOP },
619         { 186, 186, .raddr_mask = OP_MASK(5), V3D_QPU_A_CLZ },
620         { 186, 186, .raddr_mask = OP_MASK(6), V3D_QPU_A_SETMSF },
621         { 186, 186, .raddr_mask = OP_MASK(7), V3D_QPU_A_SETREVF },
622 
623         { 187, 187, .raddr_mask = OP_MASK(0), V3D_QPU_A_NOP, 0 },
624         { 187, 187, .raddr_mask = OP_MASK(1), V3D_QPU_A_TIDX },
625         { 187, 187, .raddr_mask = OP_MASK(2), V3D_QPU_A_EIDX },
626         { 187, 187, .raddr_mask = OP_MASK(3), V3D_QPU_A_LR },
627         { 187, 187, .raddr_mask = OP_MASK(4), V3D_QPU_A_VFLA },
628         { 187, 187, .raddr_mask = OP_MASK(5), V3D_QPU_A_VFLNA },
629         { 187, 187, .raddr_mask = OP_MASK(6), V3D_QPU_A_VFLB },
630         { 187, 187, .raddr_mask = OP_MASK(7), V3D_QPU_A_VFLNB },
631         { 187, 187, .raddr_mask = OP_MASK(8), V3D_QPU_A_XCD },
632         { 187, 187, .raddr_mask = OP_MASK(9), V3D_QPU_A_YCD },
633         { 187, 187, .raddr_mask = OP_MASK(10), V3D_QPU_A_MSF },
634         { 187, 187, .raddr_mask = OP_MASK(11), V3D_QPU_A_REVF },
635         { 187, 187, .raddr_mask = OP_MASK(12), V3D_QPU_A_IID },
636         { 187, 187, .raddr_mask = OP_MASK(13), V3D_QPU_A_SAMPID },
637         { 187, 187, .raddr_mask = OP_MASK(14), V3D_QPU_A_BARRIERID },
638         { 187, 187, .raddr_mask = OP_MASK(15), V3D_QPU_A_TMUWT },
639         { 187, 187, .raddr_mask = OP_MASK(16), V3D_QPU_A_VPMWT },
640         { 187, 187, .raddr_mask = OP_MASK(17), V3D_QPU_A_FLAFIRST },
641         { 187, 187, .raddr_mask = OP_MASK(18), V3D_QPU_A_FLNAFIRST },
642 
643         { 187, 187, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FXCD },
644         { 187, 187, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FYCD },
645 
646         { 188, 188, .raddr_mask = OP_MASK(0), V3D_QPU_A_LDVPMV_IN, 71 },
647         { 188, 188, .raddr_mask = OP_MASK(1), V3D_QPU_A_LDVPMD_IN, 71 },
648         { 188, 188, .raddr_mask = OP_MASK(2), V3D_QPU_A_LDVPMP, 71 },
649 
650         { 188, 188, .raddr_mask = OP_MASK(32), V3D_QPU_A_RECIP, 71 },
651         { 188, 188, .raddr_mask = OP_MASK(33), V3D_QPU_A_RSQRT, 71 },
652         { 188, 188, .raddr_mask = OP_MASK(34), V3D_QPU_A_EXP, 71 },
653         { 188, 188, .raddr_mask = OP_MASK(35), V3D_QPU_A_LOG, 71 },
654         { 188, 188, .raddr_mask = OP_MASK(36), V3D_QPU_A_SIN, 71 },
655         { 188, 188, .raddr_mask = OP_MASK(37), V3D_QPU_A_RSQRT2, 71 },
656         { 188, 188, .raddr_mask = OP_MASK(38), V3D_QPU_A_BALLOT, 71 },
657         { 188, 188, .raddr_mask = OP_MASK(39), V3D_QPU_A_BCASTF, 71 },
658         { 188, 188, .raddr_mask = OP_MASK(40), V3D_QPU_A_ALLEQ, 71 },
659         { 188, 188, .raddr_mask = OP_MASK(41), V3D_QPU_A_ALLFEQ, 71 },
660 
661         { 189, 189, .raddr_mask = ANYOPMASK, V3D_QPU_A_LDVPMG_IN, 71 },
662 
663         /* The stvpms are distinguished by the waddr field. */
664         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMV, 71},
665         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMD, 71},
666         { 190, 190, .raddr_mask = ANYOPMASK, V3D_QPU_A_STVPMP, 71},
667 
668         { 192, 207, .raddr_mask = ANYOPMASK, V3D_QPU_A_FCMP, 71 },
669 
670         { 245, 245, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FROUND, 71 },
671         { 245, 245, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FROUND, 71 },
672         { 245, 245, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FROUND, 71 },
673         { 245, 245, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FROUND, 71 },
674 
675         { 245, 245, .raddr_mask = OP_MASK(3),  V3D_QPU_A_FTOIN, 71 },
676         { 245, 245, .raddr_mask = OP_MASK(7),  V3D_QPU_A_FTOIN, 71 },
677         { 245, 245, .raddr_mask = OP_MASK(11), V3D_QPU_A_FTOIN, 71 },
678         { 245, 245, .raddr_mask = OP_MASK(15), V3D_QPU_A_FTOIN, 71 },
679 
680         { 245, 245, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FTRUNC, 71 },
681         { 245, 245, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FTRUNC, 71 },
682         { 245, 245, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FTRUNC, 71 },
683         { 245, 245, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FTRUNC, 71 },
684 
685         { 245, 245, .raddr_mask = OP_MASK(19), V3D_QPU_A_FTOIZ, 71 },
686         { 245, 245, .raddr_mask = OP_MASK(23), V3D_QPU_A_FTOIZ, 71 },
687         { 245, 245, .raddr_mask = OP_MASK(27), V3D_QPU_A_FTOIZ, 71 },
688         { 245, 245, .raddr_mask = OP_MASK(31), V3D_QPU_A_FTOIZ, 71 },
689 
690         { 245, 245, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_FFLOOR, 71 },
691         { 245, 245, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_FFLOOR, 71 },
692         { 245, 245, .raddr_mask = OP_RANGE(40, 42), V3D_QPU_A_FFLOOR, 71 },
693         { 245, 245, .raddr_mask = OP_RANGE(44, 46), V3D_QPU_A_FFLOOR, 71 },
694 
695         { 245, 245, .raddr_mask = OP_MASK(35), V3D_QPU_A_FTOUZ, 71 },
696         { 245, 245, .raddr_mask = OP_MASK(39), V3D_QPU_A_FTOUZ, 71 },
697         { 245, 245, .raddr_mask = OP_MASK(43), V3D_QPU_A_FTOUZ, 71 },
698         { 245, 245, .raddr_mask = OP_MASK(47), V3D_QPU_A_FTOUZ, 71 },
699 
700         { 245, 245, .raddr_mask = OP_RANGE(48, 50), V3D_QPU_A_FCEIL, 71 },
701         { 245, 245, .raddr_mask = OP_RANGE(52, 54), V3D_QPU_A_FCEIL, 71 },
702         { 245, 245, .raddr_mask = OP_RANGE(56, 58), V3D_QPU_A_FCEIL, 71 },
703         { 245, 245, .raddr_mask = OP_RANGE(60, 62), V3D_QPU_A_FCEIL, 71 },
704 
705         { 245, 245, .raddr_mask = OP_MASK(51), V3D_QPU_A_FTOC },
706         { 245, 245, .raddr_mask = OP_MASK(55), V3D_QPU_A_FTOC },
707         { 245, 245, .raddr_mask = OP_MASK(59), V3D_QPU_A_FTOC },
708         { 245, 245, .raddr_mask = OP_MASK(63), V3D_QPU_A_FTOC },
709 
710         { 246, 246, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FDX, 71 },
711         { 246, 246, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FDX, 71 },
712         { 246, 246, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FDX, 71 },
713         { 246, 246, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FDX, 71 },
714         { 246, 246, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FDY, 71 },
715         { 246, 246, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FDY, 71 },
716         { 246, 246, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FDY, 71 },
717         { 246, 246, .raddr_mask = OP_RANGE(28, 30), V3D_QPU_A_FDY, 71 },
718 
719         { 246, 246, .raddr_mask = OP_RANGE(32, 34), V3D_QPU_A_ITOF, 71 },
720         { 246, 246, .raddr_mask = OP_RANGE(36, 38), V3D_QPU_A_UTOF, 71 },
721 
722         { 247, 247, .raddr_mask = ANYOPMASK, V3D_QPU_A_VPACK, 71 },
723         { 248, 248, .raddr_mask = ANYOPMASK, V3D_QPU_A_V8PACK, 71 },
724 
725         { 249, 249, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_A_FMOV, 71 },
726         { 249, 249, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_A_FMOV, 71 },
727         { 249, 249, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_A_FMOV, 71 },
728         { 249, 249, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_A_FMOV, 71 },
729         { 249, 249, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_A_FMOV, 71 },
730         { 249, 249, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_A_FMOV, 71 },
731         { 249, 249, .raddr_mask = OP_RANGE(24, 26), V3D_QPU_A_FMOV, 71 },
732 
733         { 249, 249, .raddr_mask = OP_MASK(3),  V3D_QPU_A_MOV, 71 },
734         { 249, 249, .raddr_mask = OP_MASK(7),  V3D_QPU_A_MOV, 71 },
735         { 249, 249, .raddr_mask = OP_MASK(11), V3D_QPU_A_MOV, 71 },
736         { 249, 249, .raddr_mask = OP_MASK(15), V3D_QPU_A_MOV, 71 },
737         { 249, 249, .raddr_mask = OP_MASK(19), V3D_QPU_A_MOV, 71 },
738 
739         { 250, 250, .raddr_mask = ANYOPMASK, V3D_QPU_A_V10PACK, 71 },
740         { 251, 251, .raddr_mask = ANYOPMASK, V3D_QPU_A_V11FPACK, 71 },
741 
742         { 252, 252, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROTQ, 71 },
743         { 253, 253, .raddr_mask = ANYOPMASK, V3D_QPU_A_ROT, 71 },
744         { 254, 254, .raddr_mask = ANYOPMASK, V3D_QPU_A_SHUFFLE, 71 },
745 };
746 
747 static const struct opcode_desc v3d71_mul_ops[] = {
748         /* For V3D 7.1, second mask field would be ignored */
749         { 1, 1, .raddr_mask = ANYOPMASK, V3D_QPU_M_ADD, 71 },
750         { 2, 2, .raddr_mask = ANYOPMASK, V3D_QPU_M_SUB, 71 },
751         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
752         { 3, 3, .raddr_mask = ANYOPMASK, V3D_QPU_M_UMUL24, 71 },
753         { 4, 8, .raddr_mask = ANYOPMASK, V3D_QPU_M_VFMUL, 71 },
754         { 9, 9, .raddr_mask = ANYOPMASK, V3D_QPU_M_SMUL24, 71 },
755         { 10, 10, .raddr_mask = ANYOPMASK, V3D_QPU_M_MULTOP, 71 },
756 
757         { 14, 14, .raddr_mask = OP_RANGE(0, 2),   V3D_QPU_M_FMOV, 71 },
758         { 14, 14, .raddr_mask = OP_RANGE(4, 6),   V3D_QPU_M_FMOV, 71 },
759         { 14, 14, .raddr_mask = OP_RANGE(8, 10),  V3D_QPU_M_FMOV, 71 },
760         { 14, 14, .raddr_mask = OP_RANGE(12, 14), V3D_QPU_M_FMOV, 71 },
761         { 14, 14, .raddr_mask = OP_RANGE(16, 18), V3D_QPU_M_FMOV, 71 },
762         { 14, 14, .raddr_mask = OP_RANGE(20, 22), V3D_QPU_M_FMOV, 71 },
763 
764         { 14, 14, .raddr_mask = OP_MASK(3),  V3D_QPU_M_MOV, 71 },
765         { 14, 14, .raddr_mask = OP_MASK(7),  V3D_QPU_M_MOV, 71 },
766         { 14, 14, .raddr_mask = OP_MASK(11), V3D_QPU_M_MOV, 71 },
767         { 14, 14, .raddr_mask = OP_MASK(15), V3D_QPU_M_MOV, 71 },
768         { 14, 14, .raddr_mask = OP_MASK(19), V3D_QPU_M_MOV, 71 },
769 
770         { 14, 14, .raddr_mask = OP_MASK(32), V3D_QPU_M_FTOUNORM16, 71 },
771         { 14, 14, .raddr_mask = OP_MASK(33), V3D_QPU_M_FTOSNORM16, 71 },
772         { 14, 14, .raddr_mask = OP_MASK(34), V3D_QPU_M_VFTOUNORM8, 71 },
773         { 14, 14, .raddr_mask = OP_MASK(35), V3D_QPU_M_VFTOSNORM8, 71 },
774         { 14, 14, .raddr_mask = OP_MASK(48), V3D_QPU_M_VFTOUNORM10LO, 71 },
775         { 14, 14, .raddr_mask = OP_MASK(49), V3D_QPU_M_VFTOUNORM10HI, 71 },
776 
777         { 14, 14, .raddr_mask = OP_MASK(63), V3D_QPU_M_NOP, 71 },
778 
779         { 16, 63, .raddr_mask = ANYOPMASK, V3D_QPU_M_FMUL },
780 };
781 
782 /* Returns true if op_desc should be filtered out based on devinfo->ver
783  * against op_desc->first_ver and op_desc->last_ver. Check notes about
784  * first_ver/last_ver on struct opcode_desc comments.
785  */
786 static bool
opcode_invalid_in_version(const struct v3d_device_info * devinfo,const uint8_t first_ver,const uint8_t last_ver)787 opcode_invalid_in_version(const struct v3d_device_info *devinfo,
788                           const uint8_t first_ver,
789                           const uint8_t last_ver)
790 {
791         return (first_ver != 0 && devinfo->ver < first_ver) ||
792                 (last_ver != 0  && devinfo->ver > last_ver);
793 }
794 
795 /* Note that we pass as parameters mux_a, mux_b and raddr, even if depending
796  * on the devinfo->ver some would be ignored. We do this way just to avoid
797  * having two really similar lookup_opcode methods
798  */
799 static const struct opcode_desc *
lookup_opcode_from_packed(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint32_t opcode,uint32_t mux_a,uint32_t mux_b,uint32_t raddr)800 lookup_opcode_from_packed(const struct v3d_device_info *devinfo,
801                           const struct opcode_desc *opcodes,
802                           size_t num_opcodes, uint32_t opcode,
803                           uint32_t mux_a, uint32_t mux_b,
804                           uint32_t raddr)
805 {
806         for (int i = 0; i < num_opcodes; i++) {
807                 const struct opcode_desc *op_desc = &opcodes[i];
808 
809                 if (opcode < op_desc->opcode_first ||
810                     opcode > op_desc->opcode_last)
811                         continue;
812 
813                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
814                         continue;
815 
816                 if (devinfo->ver < 71) {
817                         if (!(op_desc->mux.b_mask & (1 << mux_b)))
818                                 continue;
819 
820                         if (!(op_desc->mux.a_mask & (1 << mux_a)))
821                                 continue;
822                 } else {
823                         if (!(op_desc->raddr_mask & ((uint64_t) 1 << raddr)))
824                                 continue;
825                 }
826 
827                 return op_desc;
828         }
829 
830         return NULL;
831 }
832 
833 static bool
v3d_qpu_float32_unpack_unpack(const struct v3d_device_info * devinfo,uint32_t packed,enum v3d_qpu_input_unpack * unpacked)834 v3d_qpu_float32_unpack_unpack(const struct v3d_device_info *devinfo,
835                               uint32_t packed,
836                               enum v3d_qpu_input_unpack *unpacked)
837 {
838         switch (packed) {
839         case 0:
840                 *unpacked = V3D_QPU_UNPACK_ABS;
841                 return true;
842         case 1:
843                 *unpacked = V3D_QPU_UNPACK_NONE;
844                 return true;
845         case 2:
846                 *unpacked = V3D_QPU_UNPACK_L;
847                 return true;
848         case 3:
849                 *unpacked = V3D_QPU_UNPACK_H;
850                 return true;
851         case 4:
852                 *unpacked = V3D71_QPU_UNPACK_SAT;
853                 return devinfo->ver >= 71;
854         case 5:
855                 *unpacked = V3D71_QPU_UNPACK_NSAT;
856                 return devinfo->ver >= 71;
857         case 6:
858                 *unpacked = V3D71_QPU_UNPACK_MAX0;
859                 return devinfo->ver >= 71;
860         default:
861                 return false;
862         }
863 }
864 
865 static bool
v3d_qpu_float32_unpack_pack(const struct v3d_device_info * devinfo,enum v3d_qpu_input_unpack unpacked,uint32_t * packed)866 v3d_qpu_float32_unpack_pack(const struct v3d_device_info *devinfo,
867                             enum v3d_qpu_input_unpack unpacked,
868                             uint32_t *packed)
869 {
870         switch (unpacked) {
871         case V3D_QPU_UNPACK_ABS:
872                 *packed = 0;
873                 return true;
874         case V3D_QPU_UNPACK_NONE:
875                 *packed = 1;
876                 return true;
877         case V3D_QPU_UNPACK_L:
878                 *packed = 2;
879                 return true;
880         case V3D_QPU_UNPACK_H:
881                 *packed = 3;
882                 return true;
883         case V3D71_QPU_UNPACK_SAT:
884                 *packed = 4;
885                 return devinfo->ver >= 71;
886         case V3D71_QPU_UNPACK_NSAT:
887                 *packed = 5;
888                 return devinfo->ver >= 71;
889         case V3D71_QPU_UNPACK_MAX0:
890                 *packed = 6;
891                 return devinfo->ver >= 71;
892         default:
893                 return false;
894         }
895 }
896 
897 static bool
v3d_qpu_int32_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)898 v3d_qpu_int32_unpack_unpack(uint32_t packed,
899                             enum v3d_qpu_input_unpack *unpacked)
900 {
901         switch (packed) {
902         case 0:
903                 *unpacked = V3D_QPU_UNPACK_NONE;
904                 return true;
905         case 1:
906                 *unpacked = V3D_QPU_UNPACK_UL;
907                 return true;
908         case 2:
909                 *unpacked = V3D_QPU_UNPACK_UH;
910                 return true;
911         case 3:
912                 *unpacked = V3D_QPU_UNPACK_IL;
913                 return true;
914         case 4:
915                 *unpacked = V3D_QPU_UNPACK_IH;
916                 return true;
917         default:
918                 return false;
919         }
920 }
921 
922 static bool
v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)923 v3d_qpu_int32_unpack_pack(enum v3d_qpu_input_unpack unpacked,
924                           uint32_t *packed)
925 {
926         switch (unpacked) {
927         case V3D_QPU_UNPACK_NONE:
928                 *packed = 0;
929                 return true;
930         case V3D_QPU_UNPACK_UL:
931                 *packed = 1;
932                 return true;
933         case V3D_QPU_UNPACK_UH:
934                 *packed = 2;
935                 return true;
936         case V3D_QPU_UNPACK_IL:
937                 *packed = 3;
938                 return true;
939         case V3D_QPU_UNPACK_IH:
940                 *packed = 4;
941                 return true;
942         default:
943                 return false;
944         }
945 }
946 
947 static bool
v3d_qpu_float16_unpack_unpack(uint32_t packed,enum v3d_qpu_input_unpack * unpacked)948 v3d_qpu_float16_unpack_unpack(uint32_t packed,
949                               enum v3d_qpu_input_unpack *unpacked)
950 {
951         switch (packed) {
952         case 0:
953                 *unpacked = V3D_QPU_UNPACK_NONE;
954                 return true;
955         case 1:
956                 *unpacked = V3D_QPU_UNPACK_REPLICATE_32F_16;
957                 return true;
958         case 2:
959                 *unpacked = V3D_QPU_UNPACK_REPLICATE_L_16;
960                 return true;
961         case 3:
962                 *unpacked = V3D_QPU_UNPACK_REPLICATE_H_16;
963                 return true;
964         case 4:
965                 *unpacked = V3D_QPU_UNPACK_SWAP_16;
966                 return true;
967         default:
968                 return false;
969         }
970 }
971 
972 static bool
v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,uint32_t * packed)973 v3d_qpu_float16_unpack_pack(enum v3d_qpu_input_unpack unpacked,
974                             uint32_t *packed)
975 {
976         switch (unpacked) {
977         case V3D_QPU_UNPACK_NONE:
978                 *packed = 0;
979                 return true;
980         case V3D_QPU_UNPACK_REPLICATE_32F_16:
981                 *packed = 1;
982                 return true;
983         case V3D_QPU_UNPACK_REPLICATE_L_16:
984                 *packed = 2;
985                 return true;
986         case V3D_QPU_UNPACK_REPLICATE_H_16:
987                 *packed = 3;
988                 return true;
989         case V3D_QPU_UNPACK_SWAP_16:
990                 *packed = 4;
991                 return true;
992         default:
993                 return false;
994         }
995 }
996 
997 static bool
v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,uint32_t * packed)998 v3d_qpu_float32_pack_pack(enum v3d_qpu_output_pack pack,
999                           uint32_t *packed)
1000 {
1001         switch (pack) {
1002         case V3D_QPU_PACK_NONE:
1003                 *packed = 0;
1004                 return true;
1005         case V3D_QPU_PACK_L:
1006                 *packed = 1;
1007                 return true;
1008         case V3D_QPU_PACK_H:
1009                 *packed = 2;
1010                 return true;
1011         default:
1012                 return false;
1013         }
1014 }
1015 
1016 static bool
v3d42_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1017 v3d42_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1018                      struct v3d_qpu_instr *instr)
1019 {
1020         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1021         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_A);
1022         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_ADD_B);
1023         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1024 
1025         uint32_t map_op = op;
1026         /* Some big clusters of opcodes are replicated with unpack
1027          * flags
1028          */
1029         if (map_op >= 249 && map_op <= 251)
1030                 map_op = (map_op - 249 + 245);
1031         if (map_op >= 253 && map_op <= 255)
1032                 map_op = (map_op - 253 + 245);
1033 
1034         const struct opcode_desc *desc =
1035                 lookup_opcode_from_packed(devinfo, v3d42_add_ops,
1036                                           ARRAY_SIZE(v3d42_add_ops),
1037                                           map_op, mux_a, mux_b, 0);
1038 
1039         if (!desc)
1040                 return false;
1041 
1042         instr->alu.add.op = desc->op;
1043 
1044         /* FADD/FADDNF and FMIN/FMAX are determined by the orders of the
1045          * operands.
1046          */
1047         if (((op >> 2) & 3) * 8 + mux_a > (op & 3) * 8 + mux_b) {
1048                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1049                         instr->alu.add.op = V3D_QPU_A_FMAX;
1050                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1051                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1052         }
1053 
1054         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1055          * comparisons to distinguish them.
1056          */
1057         switch (instr->alu.add.op) {
1058         case V3D_QPU_A_STVPMV:
1059         case V3D_QPU_A_STVPMD:
1060         case V3D_QPU_A_STVPMP:
1061                 switch (waddr) {
1062                 case 0:
1063                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1064                         break;
1065                 case 1:
1066                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1067                         break;
1068                 case 2:
1069                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1070                         break;
1071                 default:
1072                         return false;
1073                 }
1074                 break;
1075         default:
1076                 break;
1077         }
1078 
1079         switch (instr->alu.add.op) {
1080         case V3D_QPU_A_FADD:
1081         case V3D_QPU_A_FADDNF:
1082         case V3D_QPU_A_FSUB:
1083         case V3D_QPU_A_FMIN:
1084         case V3D_QPU_A_FMAX:
1085         case V3D_QPU_A_FCMP:
1086         case V3D_QPU_A_VFPACK:
1087                 if (instr->alu.add.op != V3D_QPU_A_VFPACK)
1088                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1089                 else
1090                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1091 
1092                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1093                                                    &instr->alu.add.a.unpack)) {
1094                         return false;
1095                 }
1096 
1097                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1098                                                    &instr->alu.add.b.unpack)) {
1099                         return false;
1100                 }
1101                 break;
1102 
1103         case V3D_QPU_A_FFLOOR:
1104         case V3D_QPU_A_FROUND:
1105         case V3D_QPU_A_FTRUNC:
1106         case V3D_QPU_A_FCEIL:
1107         case V3D_QPU_A_FDX:
1108         case V3D_QPU_A_FDY:
1109                 instr->alu.add.output_pack = mux_b & 0x3;
1110 
1111                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1112                                                    &instr->alu.add.a.unpack)) {
1113                         return false;
1114                 }
1115                 break;
1116 
1117         case V3D_QPU_A_FTOIN:
1118         case V3D_QPU_A_FTOIZ:
1119         case V3D_QPU_A_FTOUZ:
1120         case V3D_QPU_A_FTOC:
1121                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1122 
1123                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1124                                                    &instr->alu.add.a.unpack)) {
1125                         return false;
1126                 }
1127                 break;
1128 
1129         case V3D_QPU_A_VFMIN:
1130         case V3D_QPU_A_VFMAX:
1131                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1132                                                    &instr->alu.add.a.unpack)) {
1133                         return false;
1134                 }
1135 
1136                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1137                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1138                 break;
1139 
1140         default:
1141                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1142                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1143                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1144                 break;
1145         }
1146 
1147         instr->alu.add.a.mux = mux_a;
1148         instr->alu.add.b.mux = mux_b;
1149         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1150 
1151         instr->alu.add.magic_write = false;
1152         if (packed_inst & V3D_QPU_MA) {
1153                 switch (instr->alu.add.op) {
1154                 case V3D_QPU_A_LDVPMV_IN:
1155                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1156                         break;
1157                 case V3D_QPU_A_LDVPMD_IN:
1158                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1159                         break;
1160                 case V3D_QPU_A_LDVPMG_IN:
1161                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1162                         break;
1163                 default:
1164                         instr->alu.add.magic_write = true;
1165                         break;
1166                 }
1167         }
1168 
1169         return true;
1170 }
1171 
1172 static bool
v3d71_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1173 v3d71_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1174                      struct v3d_qpu_instr *instr)
1175 {
1176         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_ADD);
1177         uint32_t raddr_a = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_A);
1178         uint32_t raddr_b = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_B);
1179         uint32_t waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1180         uint32_t map_op = op;
1181 
1182         const struct opcode_desc *desc =
1183                 lookup_opcode_from_packed(devinfo,
1184                                           v3d71_add_ops,
1185                                           ARRAY_SIZE(v3d71_add_ops),
1186                                           map_op, 0, 0,
1187                                           raddr_b);
1188         if (!desc)
1189                 return false;
1190 
1191         instr->alu.add.op = desc->op;
1192 
1193         /* FADD/FADDNF and FMIN/FMAX are determined by the order of the
1194          * operands.
1195          */
1196         if (instr->sig.small_imm_a * 256 + ((op >> 2) & 3) * 64 + raddr_a >
1197             instr->sig.small_imm_b * 256 + (op & 3) * 64 + raddr_b) {
1198                 if (instr->alu.add.op == V3D_QPU_A_FMIN)
1199                         instr->alu.add.op = V3D_QPU_A_FMAX;
1200                 if (instr->alu.add.op == V3D_QPU_A_FADD)
1201                         instr->alu.add.op = V3D_QPU_A_FADDNF;
1202         }
1203 
1204         /* Some QPU ops require a bit more than just basic opcode and mux a/b
1205          * comparisons to distinguish them.
1206          */
1207         switch (instr->alu.add.op) {
1208         case V3D_QPU_A_STVPMV:
1209         case V3D_QPU_A_STVPMD:
1210         case V3D_QPU_A_STVPMP:
1211                 switch (waddr) {
1212                 case 0:
1213                         instr->alu.add.op = V3D_QPU_A_STVPMV;
1214                         break;
1215                 case 1:
1216                         instr->alu.add.op = V3D_QPU_A_STVPMD;
1217                         break;
1218                 case 2:
1219                         instr->alu.add.op = V3D_QPU_A_STVPMP;
1220                         break;
1221                 default:
1222                         return false;
1223                 }
1224                 break;
1225         default:
1226                 break;
1227         }
1228 
1229         switch (instr->alu.add.op) {
1230         case V3D_QPU_A_FADD:
1231         case V3D_QPU_A_FADDNF:
1232         case V3D_QPU_A_FSUB:
1233         case V3D_QPU_A_FMIN:
1234         case V3D_QPU_A_FMAX:
1235         case V3D_QPU_A_FCMP:
1236         case V3D_QPU_A_VFPACK:
1237                 if (instr->alu.add.op != V3D_QPU_A_VFPACK &&
1238                     instr->alu.add.op != V3D_QPU_A_FCMP) {
1239                         instr->alu.add.output_pack = (op >> 4) & 0x3;
1240                 } else {
1241                         instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1242                 }
1243 
1244                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1245                                                    &instr->alu.add.a.unpack)) {
1246                         return false;
1247                 }
1248 
1249                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1250                                                    &instr->alu.add.b.unpack)) {
1251                         return false;
1252                 }
1253                 break;
1254 
1255         case V3D_QPU_A_FFLOOR:
1256         case V3D_QPU_A_FROUND:
1257         case V3D_QPU_A_FTRUNC:
1258         case V3D_QPU_A_FCEIL:
1259         case V3D_QPU_A_FDX:
1260         case V3D_QPU_A_FDY:
1261                 instr->alu.add.output_pack = raddr_b & 0x3;
1262 
1263                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1264                                                    &instr->alu.add.a.unpack)) {
1265                         return false;
1266                 }
1267                 break;
1268 
1269         case V3D_QPU_A_FTOIN:
1270         case V3D_QPU_A_FTOIZ:
1271         case V3D_QPU_A_FTOUZ:
1272         case V3D_QPU_A_FTOC:
1273                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1274 
1275                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_b >> 2) & 0x3,
1276                                                    &instr->alu.add.a.unpack)) {
1277                         return false;
1278                 }
1279                 break;
1280 
1281         case V3D_QPU_A_VFMIN:
1282         case V3D_QPU_A_VFMAX:
1283                 unreachable("pending v3d71 update");
1284                 if (!v3d_qpu_float16_unpack_unpack(op & 0x7,
1285                                                    &instr->alu.add.a.unpack)) {
1286                         return false;
1287                 }
1288 
1289                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1290                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1291                 break;
1292 
1293         case V3D_QPU_A_MOV:
1294                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1295 
1296                 if (!v3d_qpu_int32_unpack_unpack((raddr_b >> 2) & 0x7,
1297                                                  &instr->alu.add.a.unpack)) {
1298                         return false;
1299                 }
1300                 break;
1301 
1302         case V3D_QPU_A_FMOV:
1303                 instr->alu.add.output_pack = raddr_b & 0x3;
1304 
1305                 /* Mul alu FMOV has one additional variant */
1306                 int32_t unpack = (raddr_b >> 2) & 0x7;
1307                 if (unpack == 7)
1308                         return false;
1309 
1310                 if (!v3d_qpu_float32_unpack_unpack(devinfo, unpack,
1311                                                    &instr->alu.add.a.unpack)) {
1312                         return false;
1313                 }
1314                 break;
1315 
1316         default:
1317                 instr->alu.add.output_pack = V3D_QPU_PACK_NONE;
1318                 instr->alu.add.a.unpack = V3D_QPU_UNPACK_NONE;
1319                 instr->alu.add.b.unpack = V3D_QPU_UNPACK_NONE;
1320                 break;
1321         }
1322 
1323         instr->alu.add.a.raddr = raddr_a;
1324         instr->alu.add.b.raddr = raddr_b;
1325         instr->alu.add.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_A);
1326 
1327         instr->alu.add.magic_write = false;
1328         if (packed_inst & V3D_QPU_MA) {
1329                 switch (instr->alu.add.op) {
1330                 case V3D_QPU_A_LDVPMV_IN:
1331                         instr->alu.add.op = V3D_QPU_A_LDVPMV_OUT;
1332                         break;
1333                 case V3D_QPU_A_LDVPMD_IN:
1334                         instr->alu.add.op = V3D_QPU_A_LDVPMD_OUT;
1335                         break;
1336                 case V3D_QPU_A_LDVPMG_IN:
1337                         instr->alu.add.op = V3D_QPU_A_LDVPMG_OUT;
1338                         break;
1339                 default:
1340                         instr->alu.add.magic_write = true;
1341                         break;
1342                 }
1343         }
1344 
1345         return true;
1346 }
1347 
1348 static bool
v3d_qpu_add_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1349 v3d_qpu_add_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1350                    struct v3d_qpu_instr *instr)
1351 {
1352         if (devinfo->ver >= 71)
1353                 return v3d71_qpu_add_unpack(devinfo, packed_inst, instr);
1354         else
1355                 return v3d42_qpu_add_unpack(devinfo, packed_inst, instr);
1356 }
1357 
1358 static bool
v3d42_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1359 v3d42_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1360                      struct v3d_qpu_instr *instr)
1361 {
1362         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1363         uint32_t mux_a = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_A);
1364         uint32_t mux_b = QPU_GET_FIELD(packed_inst, V3D_QPU_MUL_B);
1365 
1366         {
1367                 const struct opcode_desc *desc =
1368                         lookup_opcode_from_packed(devinfo,
1369                                                   v3d42_mul_ops,
1370                                                   ARRAY_SIZE(v3d42_mul_ops),
1371                                                   op, mux_a, mux_b, 0);
1372                 if (!desc)
1373                         return false;
1374 
1375                 instr->alu.mul.op = desc->op;
1376         }
1377 
1378         switch (instr->alu.mul.op) {
1379         case V3D_QPU_M_FMUL:
1380                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1381 
1382                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1383                                                    &instr->alu.mul.a.unpack)) {
1384                         return false;
1385                 }
1386 
1387                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1388                                                    &instr->alu.mul.b.unpack)) {
1389                         return false;
1390                 }
1391 
1392                 break;
1393 
1394         case V3D_QPU_M_FMOV:
1395                 instr->alu.mul.output_pack = (((op & 1) << 1) +
1396                                               ((mux_b >> 2) & 1));
1397 
1398                 if (!v3d_qpu_float32_unpack_unpack(devinfo, mux_b & 0x3,
1399                                                    &instr->alu.mul.a.unpack)) {
1400                         return false;
1401                 }
1402 
1403                 break;
1404 
1405         case V3D_QPU_M_VFMUL:
1406                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1407 
1408                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1409                                                    &instr->alu.mul.a.unpack)) {
1410                         return false;
1411                 }
1412 
1413                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1414 
1415                 break;
1416 
1417         default:
1418                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1419                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1420                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1421                 break;
1422         }
1423 
1424         instr->alu.mul.a.mux = mux_a;
1425         instr->alu.mul.b.mux = mux_b;
1426         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1427         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1428 
1429         return true;
1430 }
1431 
1432 static bool
v3d71_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1433 v3d71_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1434                      struct v3d_qpu_instr *instr)
1435 {
1436         uint32_t op = QPU_GET_FIELD(packed_inst, V3D_QPU_OP_MUL);
1437         uint32_t raddr_c = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_C);
1438         uint32_t raddr_d = QPU_GET_FIELD(packed_inst, V3D_QPU_RADDR_D);
1439 
1440         {
1441                 const struct opcode_desc *desc =
1442                         lookup_opcode_from_packed(devinfo,
1443                                                   v3d71_mul_ops,
1444                                                   ARRAY_SIZE(v3d71_mul_ops),
1445                                                   op, 0, 0,
1446                                                   raddr_d);
1447                 if (!desc)
1448                         return false;
1449 
1450                 instr->alu.mul.op = desc->op;
1451         }
1452 
1453         switch (instr->alu.mul.op) {
1454         case V3D_QPU_M_FMUL:
1455                 instr->alu.mul.output_pack = ((op >> 4) & 0x3) - 1;
1456 
1457                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 2) & 0x3,
1458                                                    &instr->alu.mul.a.unpack)) {
1459                         return false;
1460                 }
1461 
1462                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (op >> 0) & 0x3,
1463                                                    &instr->alu.mul.b.unpack)) {
1464                         return false;
1465                 }
1466 
1467                 break;
1468 
1469         case V3D_QPU_M_FMOV:
1470                 instr->alu.mul.output_pack = raddr_d & 0x3;
1471 
1472                 if (!v3d_qpu_float32_unpack_unpack(devinfo, (raddr_d >> 2) & 0x3,
1473                                                    &instr->alu.mul.a.unpack)) {
1474                         return false;
1475                 }
1476 
1477                 break;
1478 
1479         case V3D_QPU_M_VFMUL:
1480                 unreachable("pending v3d71 update");
1481                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1482 
1483                 if (!v3d_qpu_float16_unpack_unpack(((op & 0x7) - 4) & 7,
1484                                                    &instr->alu.mul.a.unpack)) {
1485                         return false;
1486                 }
1487 
1488                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1489 
1490                 break;
1491 
1492         case V3D_QPU_M_MOV:
1493                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1494 
1495                 if (!v3d_qpu_int32_unpack_unpack((raddr_d >> 2) & 0x7,
1496                                                  &instr->alu.mul.a.unpack)) {
1497                         return false;
1498                 }
1499                 break;
1500 
1501         default:
1502                 instr->alu.mul.output_pack = V3D_QPU_PACK_NONE;
1503                 instr->alu.mul.a.unpack = V3D_QPU_UNPACK_NONE;
1504                 instr->alu.mul.b.unpack = V3D_QPU_UNPACK_NONE;
1505                 break;
1506         }
1507 
1508         instr->alu.mul.a.raddr = raddr_c;
1509         instr->alu.mul.b.raddr = raddr_d;
1510         instr->alu.mul.waddr = QPU_GET_FIELD(packed_inst, V3D_QPU_WADDR_M);
1511         instr->alu.mul.magic_write = packed_inst & V3D_QPU_MM;
1512 
1513         return true;
1514 }
1515 
1516 static bool
v3d_qpu_mul_unpack(const struct v3d_device_info * devinfo,uint64_t packed_inst,struct v3d_qpu_instr * instr)1517 v3d_qpu_mul_unpack(const struct v3d_device_info *devinfo, uint64_t packed_inst,
1518                    struct v3d_qpu_instr *instr)
1519 {
1520         if (devinfo->ver >= 71)
1521                 return v3d71_qpu_mul_unpack(devinfo, packed_inst, instr);
1522         else
1523                 return v3d42_qpu_mul_unpack(devinfo, packed_inst, instr);
1524 }
1525 
1526 static const struct opcode_desc *
lookup_opcode_from_instr(const struct v3d_device_info * devinfo,const struct opcode_desc * opcodes,size_t num_opcodes,uint8_t op)1527 lookup_opcode_from_instr(const struct v3d_device_info *devinfo,
1528                          const struct opcode_desc *opcodes, size_t num_opcodes,
1529                          uint8_t op)
1530 {
1531         for (int i = 0; i < num_opcodes; i++) {
1532                 const struct opcode_desc *op_desc = &opcodes[i];
1533 
1534                 if (op_desc->op != op)
1535                         continue;
1536 
1537                 if (opcode_invalid_in_version(devinfo, op_desc->first_ver, op_desc->last_ver))
1538                         continue;
1539 
1540                 return op_desc;
1541         }
1542 
1543         return NULL;
1544 }
1545 
1546 static bool
v3d42_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1547 v3d42_qpu_add_pack(const struct v3d_device_info *devinfo,
1548                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1549 {
1550         uint32_t waddr = instr->alu.add.waddr;
1551         uint32_t mux_a = instr->alu.add.a.mux;
1552         uint32_t mux_b = instr->alu.add.b.mux;
1553         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1554         const struct opcode_desc *desc =
1555                 lookup_opcode_from_instr(devinfo, v3d42_add_ops,
1556                                          ARRAY_SIZE(v3d42_add_ops),
1557                                          instr->alu.add.op);
1558 
1559         if (!desc)
1560                 return false;
1561 
1562         uint32_t opcode = desc->opcode_first;
1563 
1564         /* If an operation doesn't use an arg, its mux values may be used to
1565          * identify the operation type.
1566          */
1567         if (nsrc < 2)
1568                 mux_b = ffs(desc->mux.b_mask) - 1;
1569 
1570         if (nsrc < 1)
1571                 mux_a = ffs(desc->mux.a_mask) - 1;
1572 
1573         bool no_magic_write = false;
1574 
1575         switch (instr->alu.add.op) {
1576         case V3D_QPU_A_STVPMV:
1577                 waddr = 0;
1578                 no_magic_write = true;
1579                 break;
1580         case V3D_QPU_A_STVPMD:
1581                 waddr = 1;
1582                 no_magic_write = true;
1583                 break;
1584         case V3D_QPU_A_STVPMP:
1585                 waddr = 2;
1586                 no_magic_write = true;
1587                 break;
1588 
1589         case V3D_QPU_A_LDVPMV_IN:
1590         case V3D_QPU_A_LDVPMD_IN:
1591         case V3D_QPU_A_LDVPMP:
1592         case V3D_QPU_A_LDVPMG_IN:
1593                 assert(!instr->alu.add.magic_write);
1594                 break;
1595 
1596         case V3D_QPU_A_LDVPMV_OUT:
1597         case V3D_QPU_A_LDVPMD_OUT:
1598         case V3D_QPU_A_LDVPMG_OUT:
1599                 assert(!instr->alu.add.magic_write);
1600                 *packed_instr |= V3D_QPU_MA;
1601                 break;
1602 
1603         default:
1604                 break;
1605         }
1606 
1607         switch (instr->alu.add.op) {
1608         case V3D_QPU_A_FADD:
1609         case V3D_QPU_A_FADDNF:
1610         case V3D_QPU_A_FSUB:
1611         case V3D_QPU_A_FMIN:
1612         case V3D_QPU_A_FMAX:
1613         case V3D_QPU_A_FCMP: {
1614                 uint32_t output_pack;
1615                 uint32_t a_unpack;
1616                 uint32_t b_unpack;
1617 
1618                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1619                                                &output_pack)) {
1620                         return false;
1621                 }
1622                 opcode |= output_pack << 4;
1623 
1624                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1625                                                  instr->alu.add.a.unpack,
1626                                                  &a_unpack)) {
1627                         return false;
1628                 }
1629 
1630                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1631                                                  instr->alu.add.b.unpack,
1632                                                  &b_unpack)) {
1633                         return false;
1634                 }
1635 
1636                 /* These operations with commutative operands are
1637                  * distinguished by which order their operands come in.
1638                  */
1639                 bool ordering = a_unpack * 8 + mux_a > b_unpack * 8 + mux_b;
1640                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1641                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1642                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1643                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1644                         uint32_t temp;
1645 
1646                         temp = a_unpack;
1647                         a_unpack = b_unpack;
1648                         b_unpack = temp;
1649 
1650                         temp = mux_a;
1651                         mux_a = mux_b;
1652                         mux_b = temp;
1653                 }
1654 
1655                 opcode |= a_unpack << 2;
1656                 opcode |= b_unpack << 0;
1657 
1658                 break;
1659         }
1660 
1661         case V3D_QPU_A_VFPACK: {
1662                 uint32_t a_unpack;
1663                 uint32_t b_unpack;
1664 
1665                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1666                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1667                         return false;
1668                 }
1669 
1670                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1671                                                  instr->alu.add.a.unpack,
1672                                                  &a_unpack)) {
1673                         return false;
1674                 }
1675 
1676                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1677                                                  instr->alu.add.b.unpack,
1678                                                  &b_unpack)) {
1679                         return false;
1680                 }
1681 
1682                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1683                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1684 
1685                 break;
1686         }
1687 
1688         case V3D_QPU_A_FFLOOR:
1689         case V3D_QPU_A_FROUND:
1690         case V3D_QPU_A_FTRUNC:
1691         case V3D_QPU_A_FCEIL:
1692         case V3D_QPU_A_FDX:
1693         case V3D_QPU_A_FDY: {
1694                 uint32_t packed;
1695 
1696                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1697                                                &packed)) {
1698                         return false;
1699                 }
1700                 mux_b |= packed;
1701 
1702                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1703                                                  instr->alu.add.a.unpack,
1704                                                  &packed)) {
1705                         return false;
1706                 }
1707                 if (packed == 0)
1708                         return false;
1709                 opcode = (opcode & ~(0x3 << 2)) | packed << 2;
1710                 break;
1711         }
1712 
1713         case V3D_QPU_A_FTOIN:
1714         case V3D_QPU_A_FTOIZ:
1715         case V3D_QPU_A_FTOUZ:
1716         case V3D_QPU_A_FTOC:
1717                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1718                         return false;
1719 
1720                 uint32_t packed;
1721                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1722                                                  instr->alu.add.a.unpack,
1723                                                  &packed)) {
1724                         return false;
1725                 }
1726                 if (packed == 0)
1727                         return false;
1728                 opcode |= packed << 2;
1729 
1730                 break;
1731 
1732         case V3D_QPU_A_VFMIN:
1733         case V3D_QPU_A_VFMAX:
1734                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1735                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1736                         return false;
1737                 }
1738 
1739                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1740                                                  &packed)) {
1741                         return false;
1742                 }
1743                 opcode |= packed;
1744                 break;
1745 
1746         default:
1747                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
1748                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1749                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
1750                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
1751                         return false;
1752                 }
1753                 break;
1754         }
1755 
1756         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_ADD_A);
1757         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_ADD_B);
1758         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
1759         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
1760         if (instr->alu.add.magic_write && !no_magic_write)
1761                 *packed_instr |= V3D_QPU_MA;
1762 
1763         return true;
1764 }
1765 
1766 static bool
v3d71_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)1767 v3d71_qpu_add_pack(const struct v3d_device_info *devinfo,
1768                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
1769 {
1770         uint32_t waddr = instr->alu.add.waddr;
1771         uint32_t raddr_a = instr->alu.add.a.raddr;
1772         uint32_t raddr_b = instr->alu.add.b.raddr;
1773 
1774         int nsrc = v3d_qpu_add_op_num_src(instr->alu.add.op);
1775         const struct opcode_desc *desc =
1776                 lookup_opcode_from_instr(devinfo, v3d71_add_ops,
1777                                          ARRAY_SIZE(v3d71_add_ops),
1778                                          instr->alu.add.op);
1779         if (!desc)
1780                 return false;
1781 
1782         uint32_t opcode = desc->opcode_first;
1783 
1784         /* If an operation doesn't use an arg, its raddr values may be used to
1785          * identify the operation type.
1786          */
1787         if (nsrc < 2)
1788                 raddr_b = ffsll(desc->raddr_mask) - 1;
1789 
1790         bool no_magic_write = false;
1791 
1792         switch (instr->alu.add.op) {
1793         case V3D_QPU_A_STVPMV:
1794                 waddr = 0;
1795                 no_magic_write = true;
1796                 break;
1797         case V3D_QPU_A_STVPMD:
1798                 waddr = 1;
1799                 no_magic_write = true;
1800                 break;
1801         case V3D_QPU_A_STVPMP:
1802                 waddr = 2;
1803                 no_magic_write = true;
1804                 break;
1805 
1806         case V3D_QPU_A_LDVPMV_IN:
1807         case V3D_QPU_A_LDVPMD_IN:
1808         case V3D_QPU_A_LDVPMP:
1809         case V3D_QPU_A_LDVPMG_IN:
1810                 assert(!instr->alu.add.magic_write);
1811                 break;
1812 
1813         case V3D_QPU_A_LDVPMV_OUT:
1814         case V3D_QPU_A_LDVPMD_OUT:
1815         case V3D_QPU_A_LDVPMG_OUT:
1816                 assert(!instr->alu.add.magic_write);
1817                 *packed_instr |= V3D_QPU_MA;
1818                 break;
1819 
1820         default:
1821                 break;
1822         }
1823 
1824         switch (instr->alu.add.op) {
1825         case V3D_QPU_A_FADD:
1826         case V3D_QPU_A_FADDNF:
1827         case V3D_QPU_A_FSUB:
1828         case V3D_QPU_A_FMIN:
1829         case V3D_QPU_A_FMAX:
1830         case V3D_QPU_A_FCMP: {
1831                 uint32_t output_pack;
1832                 uint32_t a_unpack;
1833                 uint32_t b_unpack;
1834 
1835                 if (instr->alu.add.op != V3D_QPU_A_FCMP) {
1836                         if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1837                                                        &output_pack)) {
1838                                 return false;
1839                         }
1840                         opcode |= output_pack << 4;
1841                 }
1842 
1843                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1844                                                  instr->alu.add.a.unpack,
1845                                                  &a_unpack)) {
1846                         return false;
1847                 }
1848 
1849                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1850                                                  instr->alu.add.b.unpack,
1851                                                  &b_unpack)) {
1852                         return false;
1853                 }
1854 
1855                 /* These operations with commutative operands are
1856                  * distinguished by the order of the operands come in.
1857                  */
1858                 bool ordering =
1859                         instr->sig.small_imm_a * 256 + a_unpack * 64 + raddr_a >
1860                         instr->sig.small_imm_b * 256 + b_unpack * 64 + raddr_b;
1861                 if (((instr->alu.add.op == V3D_QPU_A_FMIN ||
1862                       instr->alu.add.op == V3D_QPU_A_FADD) && ordering) ||
1863                     ((instr->alu.add.op == V3D_QPU_A_FMAX ||
1864                       instr->alu.add.op == V3D_QPU_A_FADDNF) && !ordering)) {
1865                         uint32_t temp;
1866 
1867                         temp = a_unpack;
1868                         a_unpack = b_unpack;
1869                         b_unpack = temp;
1870 
1871                         temp = raddr_a;
1872                         raddr_a = raddr_b;
1873                         raddr_b = temp;
1874 
1875                         /* If we are swapping raddr_a/b we also need to swap
1876                          * small_imm_a/b.
1877                          */
1878                         if (instr->sig.small_imm_a || instr->sig.small_imm_b) {
1879                                 assert(instr->sig.small_imm_a !=
1880                                        instr->sig.small_imm_b);
1881                                 struct v3d_qpu_sig new_sig = instr->sig;
1882                                 new_sig.small_imm_a = !instr->sig.small_imm_a;
1883                                 new_sig.small_imm_b = !instr->sig.small_imm_b;
1884                                 uint32_t sig;
1885                                 if (!v3d_qpu_sig_pack(devinfo, &new_sig, &sig))
1886                                     return false;
1887                             *packed_instr &= ~V3D_QPU_SIG_MASK;
1888                             *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
1889                         }
1890                 }
1891 
1892                 opcode |= a_unpack << 2;
1893                 opcode |= b_unpack << 0;
1894 
1895                 break;
1896         }
1897 
1898         case V3D_QPU_A_VFPACK: {
1899                 uint32_t a_unpack;
1900                 uint32_t b_unpack;
1901 
1902                 if (instr->alu.add.a.unpack == V3D_QPU_UNPACK_ABS ||
1903                     instr->alu.add.b.unpack == V3D_QPU_UNPACK_ABS) {
1904                         return false;
1905                 }
1906 
1907                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1908                                                  instr->alu.add.a.unpack,
1909                                                  &a_unpack)) {
1910                         return false;
1911                 }
1912 
1913                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1914                                                  instr->alu.add.b.unpack,
1915                                                  &b_unpack)) {
1916                         return false;
1917                 }
1918 
1919                 opcode = (opcode & ~(0x3 << 2)) | (a_unpack << 2);
1920                 opcode = (opcode & ~(0x3 << 0)) | (b_unpack << 0);
1921 
1922                 break;
1923         }
1924 
1925         case V3D_QPU_A_FFLOOR:
1926         case V3D_QPU_A_FROUND:
1927         case V3D_QPU_A_FTRUNC:
1928         case V3D_QPU_A_FCEIL:
1929         case V3D_QPU_A_FDX:
1930         case V3D_QPU_A_FDY: {
1931                 uint32_t packed;
1932 
1933                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
1934                                                &packed)) {
1935                         return false;
1936                 }
1937                 raddr_b |= packed;
1938 
1939                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1940                                                  instr->alu.add.a.unpack,
1941                                                  &packed)) {
1942                         return false;
1943                 }
1944                 if (packed == 0)
1945                         return false;
1946                 raddr_b = (raddr_b & ~(0x3 << 2)) | packed << 2;
1947                 break;
1948         }
1949 
1950         case V3D_QPU_A_FTOIN:
1951         case V3D_QPU_A_FTOIZ:
1952         case V3D_QPU_A_FTOUZ:
1953         case V3D_QPU_A_FTOC:
1954                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1955                         return false;
1956 
1957                 uint32_t packed;
1958                 if (!v3d_qpu_float32_unpack_pack(devinfo,
1959                                                  instr->alu.add.a.unpack,
1960                                                  &packed)) {
1961                         return false;
1962                 }
1963                 if (packed == 0)
1964                         return false;
1965 
1966                 raddr_b |= (raddr_b & ~(0x3 << 2)) | packed << 2;
1967 
1968                 break;
1969 
1970         case V3D_QPU_A_VFMIN:
1971         case V3D_QPU_A_VFMAX:
1972                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
1973                     instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE) {
1974                         return false;
1975                 }
1976 
1977                 if (!v3d_qpu_float16_unpack_pack(instr->alu.add.a.unpack,
1978                                                  &packed)) {
1979                         return false;
1980                 }
1981                 opcode |= packed;
1982                 break;
1983 
1984         case V3D_QPU_A_MOV: {
1985                 uint32_t packed;
1986 
1987                 if (instr->alu.add.output_pack != V3D_QPU_PACK_NONE)
1988                         return false;
1989 
1990                 if (!v3d_qpu_int32_unpack_pack(instr->alu.add.a.unpack,
1991                                                &packed)) {
1992                         return false;
1993                 }
1994 
1995                 raddr_b |= packed << 2;
1996                 break;
1997         }
1998 
1999         case V3D_QPU_A_FMOV: {
2000                 uint32_t packed;
2001 
2002                 if (!v3d_qpu_float32_pack_pack(instr->alu.add.output_pack,
2003                                                &packed)) {
2004                         return false;
2005                 }
2006                 raddr_b = packed;
2007 
2008                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2009                                                  instr->alu.add.a.unpack,
2010                                                  &packed)) {
2011                         return false;
2012                 }
2013                 raddr_b |= packed << 2;
2014                 break;
2015         }
2016 
2017         default:
2018                 if (instr->alu.add.op != V3D_QPU_A_NOP &&
2019                     (instr->alu.add.output_pack != V3D_QPU_PACK_NONE ||
2020                      instr->alu.add.a.unpack != V3D_QPU_UNPACK_NONE ||
2021                      instr->alu.add.b.unpack != V3D_QPU_UNPACK_NONE)) {
2022                         return false;
2023                 }
2024                 break;
2025         }
2026 
2027         *packed_instr |= QPU_SET_FIELD(raddr_a, V3D_QPU_RADDR_A);
2028         *packed_instr |= QPU_SET_FIELD(raddr_b, V3D_QPU_RADDR_B);
2029         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_ADD);
2030         *packed_instr |= QPU_SET_FIELD(waddr, V3D_QPU_WADDR_A);
2031         if (instr->alu.add.magic_write && !no_magic_write)
2032                 *packed_instr |= V3D_QPU_MA;
2033 
2034         return true;
2035 }
2036 
2037 static bool
v3d42_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2038 v3d42_qpu_mul_pack(const struct v3d_device_info *devinfo,
2039                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2040 {
2041         uint32_t mux_a = instr->alu.mul.a.mux;
2042         uint32_t mux_b = instr->alu.mul.b.mux;
2043         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2044 
2045         const struct opcode_desc *desc =
2046                 lookup_opcode_from_instr(devinfo, v3d42_mul_ops,
2047                                          ARRAY_SIZE(v3d42_mul_ops),
2048                                          instr->alu.mul.op);
2049 
2050         if (!desc)
2051                 return false;
2052 
2053         uint32_t opcode = desc->opcode_first;
2054 
2055         /* Some opcodes have a single valid value for their mux a/b, so set
2056          * that here.  If mux a/b determine packing, it will be set below.
2057          */
2058         if (nsrc < 2)
2059                 mux_b = ffs(desc->mux.b_mask) - 1;
2060 
2061         if (nsrc < 1)
2062                 mux_a = ffs(desc->mux.a_mask) - 1;
2063 
2064         switch (instr->alu.mul.op) {
2065         case V3D_QPU_M_FMUL: {
2066                 uint32_t packed;
2067 
2068                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2069                                                &packed)) {
2070                         return false;
2071                 }
2072                 /* No need for a +1 because desc->opcode_first has a 1 in this
2073                  * field.
2074                  */
2075                 opcode += packed << 4;
2076 
2077                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2078                                                  instr->alu.mul.a.unpack,
2079                                                  &packed)) {
2080                         return false;
2081                 }
2082                 opcode |= packed << 2;
2083 
2084                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2085                                                  instr->alu.mul.b.unpack,
2086                                                  &packed)) {
2087                         return false;
2088                 }
2089                 opcode |= packed << 0;
2090                 break;
2091         }
2092 
2093         case V3D_QPU_M_FMOV: {
2094                 uint32_t packed;
2095 
2096                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2097                                                &packed)) {
2098                         return false;
2099                 }
2100                 opcode |= (packed >> 1) & 1;
2101                 mux_b = (packed & 1) << 2;
2102 
2103                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2104                                                  instr->alu.mul.a.unpack,
2105                                                  &packed)) {
2106                         return false;
2107                 }
2108                 mux_b |= packed;
2109                 break;
2110         }
2111 
2112         case V3D_QPU_M_VFMUL: {
2113                 uint32_t packed;
2114 
2115                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2116                         return false;
2117 
2118                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2119                                                  &packed)) {
2120                         return false;
2121                 }
2122                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2123                         opcode = 8;
2124                 else
2125                         opcode |= (packed + 4) & 7;
2126 
2127                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2128                         return false;
2129 
2130                 break;
2131         }
2132 
2133         default:
2134                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2135                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2136                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2137                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2138                         return false;
2139                 }
2140                 break;
2141         }
2142 
2143         *packed_instr |= QPU_SET_FIELD(mux_a, V3D_QPU_MUL_A);
2144         *packed_instr |= QPU_SET_FIELD(mux_b, V3D_QPU_MUL_B);
2145 
2146         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2147         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2148         if (instr->alu.mul.magic_write)
2149                 *packed_instr |= V3D_QPU_MM;
2150 
2151         return true;
2152 }
2153 
2154 static bool
v3d71_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2155 v3d71_qpu_mul_pack(const struct v3d_device_info *devinfo,
2156                    const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2157 {
2158         uint32_t raddr_c = instr->alu.mul.a.raddr;
2159         uint32_t raddr_d = instr->alu.mul.b.raddr;
2160         int nsrc = v3d_qpu_mul_op_num_src(instr->alu.mul.op);
2161 
2162         const struct opcode_desc *desc =
2163                 lookup_opcode_from_instr(devinfo, v3d71_mul_ops,
2164                                          ARRAY_SIZE(v3d71_mul_ops),
2165                                          instr->alu.mul.op);
2166         if (!desc)
2167                 return false;
2168 
2169         uint32_t opcode = desc->opcode_first;
2170 
2171         /* Some opcodes have a single valid value for their raddr_d, so set
2172          * that here.  If raddr_b determine packing, it will be set below.
2173          */
2174         if (nsrc < 2)
2175                 raddr_d = ffsll(desc->raddr_mask) - 1;
2176 
2177         switch (instr->alu.mul.op) {
2178         case V3D_QPU_M_FMUL: {
2179                 uint32_t packed;
2180 
2181                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2182                                                &packed)) {
2183                         return false;
2184                 }
2185                 /* No need for a +1 because desc->opcode_first has a 1 in this
2186                  * field.
2187                  */
2188                 opcode += packed << 4;
2189 
2190                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2191                                                  instr->alu.mul.a.unpack,
2192                                                  &packed)) {
2193                         return false;
2194                 }
2195                 opcode |= packed << 2;
2196 
2197                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2198                                                  instr->alu.mul.b.unpack,
2199                                                  &packed)) {
2200                         return false;
2201                 }
2202                 opcode |= packed << 0;
2203                 break;
2204         }
2205 
2206         case V3D_QPU_M_FMOV: {
2207                 uint32_t packed;
2208 
2209                 if (!v3d_qpu_float32_pack_pack(instr->alu.mul.output_pack,
2210                                                &packed)) {
2211                         return false;
2212                 }
2213                 raddr_d |= packed;
2214 
2215                 if (!v3d_qpu_float32_unpack_pack(devinfo,
2216                                                  instr->alu.mul.a.unpack,
2217                                                  &packed)) {
2218                         return false;
2219                 }
2220                 raddr_d |= packed << 2;
2221                 break;
2222         }
2223 
2224         case V3D_QPU_M_VFMUL: {
2225                 unreachable("pending v3d71 update");
2226                 uint32_t packed;
2227 
2228                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2229                         return false;
2230 
2231                 if (!v3d_qpu_float16_unpack_pack(instr->alu.mul.a.unpack,
2232                                                  &packed)) {
2233                         return false;
2234                 }
2235                 if (instr->alu.mul.a.unpack == V3D_QPU_UNPACK_SWAP_16)
2236                         opcode = 8;
2237                 else
2238                         opcode |= (packed + 4) & 7;
2239 
2240                 if (instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)
2241                         return false;
2242 
2243                 break;
2244         }
2245 
2246         case V3D_QPU_M_MOV: {
2247                 uint32_t packed;
2248 
2249                 if (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE)
2250                         return false;
2251 
2252                 if (!v3d_qpu_int32_unpack_pack(instr->alu.mul.a.unpack,
2253                                                &packed)) {
2254                         return false;
2255                 }
2256 
2257                 raddr_d |= packed << 2;
2258                 break;
2259         }
2260 
2261         default:
2262                 if (instr->alu.mul.op != V3D_QPU_M_NOP &&
2263                     (instr->alu.mul.output_pack != V3D_QPU_PACK_NONE ||
2264                      instr->alu.mul.a.unpack != V3D_QPU_UNPACK_NONE ||
2265                      instr->alu.mul.b.unpack != V3D_QPU_UNPACK_NONE)) {
2266                         return false;
2267                 }
2268                 break;
2269         }
2270 
2271         *packed_instr |= QPU_SET_FIELD(raddr_c, V3D_QPU_RADDR_C);
2272         *packed_instr |= QPU_SET_FIELD(raddr_d, V3D_QPU_RADDR_D);
2273         *packed_instr |= QPU_SET_FIELD(opcode, V3D_QPU_OP_MUL);
2274         *packed_instr |= QPU_SET_FIELD(instr->alu.mul.waddr, V3D_QPU_WADDR_M);
2275         if (instr->alu.mul.magic_write)
2276                 *packed_instr |= V3D_QPU_MM;
2277 
2278         return true;
2279 }
2280 
2281 static bool
v3d_qpu_add_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2282 v3d_qpu_add_pack(const struct v3d_device_info *devinfo,
2283                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2284 {
2285         if (devinfo->ver >= 71)
2286                 return v3d71_qpu_add_pack(devinfo, instr, packed_instr);
2287         else
2288                 return v3d42_qpu_add_pack(devinfo, instr, packed_instr);
2289 }
2290 
2291 static bool
v3d_qpu_mul_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2292 v3d_qpu_mul_pack(const struct v3d_device_info *devinfo,
2293                  const struct v3d_qpu_instr *instr, uint64_t *packed_instr)
2294 {
2295         if (devinfo->ver >= 71)
2296                 return v3d71_qpu_mul_pack(devinfo, instr, packed_instr);
2297         else
2298                 return v3d42_qpu_mul_pack(devinfo, instr, packed_instr);
2299 }
2300 
2301 static bool
v3d_qpu_instr_unpack_alu(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2302 v3d_qpu_instr_unpack_alu(const struct v3d_device_info *devinfo,
2303                          uint64_t packed_instr,
2304                          struct v3d_qpu_instr *instr)
2305 {
2306         instr->type = V3D_QPU_INSTR_TYPE_ALU;
2307 
2308         if (!v3d_qpu_sig_unpack(devinfo,
2309                                 QPU_GET_FIELD(packed_instr, V3D_QPU_SIG),
2310                                 &instr->sig))
2311                 return false;
2312 
2313         uint32_t packed_cond = QPU_GET_FIELD(packed_instr, V3D_QPU_COND);
2314         if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2315                 instr->sig_addr = packed_cond & ~V3D_QPU_COND_SIG_MAGIC_ADDR;
2316                 instr->sig_magic = packed_cond & V3D_QPU_COND_SIG_MAGIC_ADDR;
2317 
2318                 instr->flags.ac = V3D_QPU_COND_NONE;
2319                 instr->flags.mc = V3D_QPU_COND_NONE;
2320                 instr->flags.apf = V3D_QPU_PF_NONE;
2321                 instr->flags.mpf = V3D_QPU_PF_NONE;
2322                 instr->flags.auf = V3D_QPU_UF_NONE;
2323                 instr->flags.muf = V3D_QPU_UF_NONE;
2324         } else {
2325                 if (!v3d_qpu_flags_unpack(devinfo, packed_cond, &instr->flags))
2326                         return false;
2327         }
2328 
2329         if (devinfo->ver <= 71) {
2330                 /*
2331                  * For v3d71 this will be set on add/mul unpack, as raddr are
2332                  * now part of v3d_qpu_input
2333                  */
2334                 instr->raddr_a = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_A);
2335                 instr->raddr_b = QPU_GET_FIELD(packed_instr, V3D_QPU_RADDR_B);
2336         }
2337 
2338         if (!v3d_qpu_add_unpack(devinfo, packed_instr, instr))
2339                 return false;
2340 
2341         if (!v3d_qpu_mul_unpack(devinfo, packed_instr, instr))
2342                 return false;
2343 
2344         return true;
2345 }
2346 
2347 static bool
v3d_qpu_instr_unpack_branch(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2348 v3d_qpu_instr_unpack_branch(const struct v3d_device_info *devinfo,
2349                             uint64_t packed_instr,
2350                             struct v3d_qpu_instr *instr)
2351 {
2352         instr->type = V3D_QPU_INSTR_TYPE_BRANCH;
2353 
2354         uint32_t cond = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_COND);
2355         if (cond == 0)
2356                 instr->branch.cond = V3D_QPU_BRANCH_COND_ALWAYS;
2357         else if (V3D_QPU_BRANCH_COND_A0 + (cond - 2) <=
2358                  V3D_QPU_BRANCH_COND_ALLNA)
2359                 instr->branch.cond = V3D_QPU_BRANCH_COND_A0 + (cond - 2);
2360         else
2361                 return false;
2362 
2363         uint32_t msfign = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_MSFIGN);
2364         if (msfign == 3)
2365                 return false;
2366         instr->branch.msfign = msfign;
2367 
2368         instr->branch.bdi = QPU_GET_FIELD(packed_instr, V3D_QPU_BRANCH_BDI);
2369 
2370         instr->branch.ub = packed_instr & V3D_QPU_BRANCH_UB;
2371         if (instr->branch.ub) {
2372                 instr->branch.bdu = QPU_GET_FIELD(packed_instr,
2373                                                   V3D_QPU_BRANCH_BDU);
2374         }
2375 
2376         instr->branch.raddr_a = QPU_GET_FIELD(packed_instr,
2377                                               V3D_QPU_RADDR_A);
2378 
2379         instr->branch.offset = 0;
2380 
2381         instr->branch.offset +=
2382                 QPU_GET_FIELD(packed_instr,
2383                               V3D_QPU_BRANCH_ADDR_LOW) << 3;
2384 
2385         instr->branch.offset +=
2386                 QPU_GET_FIELD(packed_instr,
2387                               V3D_QPU_BRANCH_ADDR_HIGH) << 24;
2388 
2389         return true;
2390 }
2391 
2392 bool
v3d_qpu_instr_unpack(const struct v3d_device_info * devinfo,uint64_t packed_instr,struct v3d_qpu_instr * instr)2393 v3d_qpu_instr_unpack(const struct v3d_device_info *devinfo,
2394                      uint64_t packed_instr,
2395                      struct v3d_qpu_instr *instr)
2396 {
2397         if (QPU_GET_FIELD(packed_instr, V3D_QPU_OP_MUL) != 0) {
2398                 return v3d_qpu_instr_unpack_alu(devinfo, packed_instr, instr);
2399         } else {
2400                 uint32_t sig = QPU_GET_FIELD(packed_instr, V3D_QPU_SIG);
2401 
2402                 if ((sig & 24) == 16) {
2403                         return v3d_qpu_instr_unpack_branch(devinfo, packed_instr,
2404                                                            instr);
2405                 } else {
2406                         return false;
2407                 }
2408         }
2409 }
2410 
2411 static bool
v3d_qpu_instr_pack_alu(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2412 v3d_qpu_instr_pack_alu(const struct v3d_device_info *devinfo,
2413                        const struct v3d_qpu_instr *instr,
2414                        uint64_t *packed_instr)
2415 {
2416         uint32_t sig;
2417         if (!v3d_qpu_sig_pack(devinfo, &instr->sig, &sig))
2418                 return false;
2419         *packed_instr |= QPU_SET_FIELD(sig, V3D_QPU_SIG);
2420 
2421         if (instr->type == V3D_QPU_INSTR_TYPE_ALU) {
2422                 if (devinfo->ver < 71) {
2423                         /*
2424                          * For v3d71 this will be set on add/mul unpack, as
2425                          * raddr are now part of v3d_qpu_input
2426                          */
2427                         *packed_instr |= QPU_SET_FIELD(instr->raddr_a, V3D_QPU_RADDR_A);
2428                         *packed_instr |= QPU_SET_FIELD(instr->raddr_b, V3D_QPU_RADDR_B);
2429                 }
2430 
2431                 if (!v3d_qpu_add_pack(devinfo, instr, packed_instr))
2432                         return false;
2433                 if (!v3d_qpu_mul_pack(devinfo, instr, packed_instr))
2434                         return false;
2435 
2436                 uint32_t flags;
2437                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig)) {
2438                         if (instr->flags.ac != V3D_QPU_COND_NONE ||
2439                             instr->flags.mc != V3D_QPU_COND_NONE ||
2440                             instr->flags.apf != V3D_QPU_PF_NONE ||
2441                             instr->flags.mpf != V3D_QPU_PF_NONE ||
2442                             instr->flags.auf != V3D_QPU_UF_NONE ||
2443                             instr->flags.muf != V3D_QPU_UF_NONE) {
2444                                 return false;
2445                         }
2446 
2447                         flags = instr->sig_addr;
2448                         if (instr->sig_magic)
2449                                 flags |= V3D_QPU_COND_SIG_MAGIC_ADDR;
2450                 } else {
2451                         if (!v3d_qpu_flags_pack(devinfo, &instr->flags, &flags))
2452                                 return false;
2453                 }
2454 
2455                 *packed_instr |= QPU_SET_FIELD(flags, V3D_QPU_COND);
2456         } else {
2457                 if (v3d_qpu_sig_writes_address(devinfo, &instr->sig))
2458                         return false;
2459         }
2460 
2461         return true;
2462 }
2463 
2464 static bool
v3d_qpu_instr_pack_branch(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2465 v3d_qpu_instr_pack_branch(const struct v3d_device_info *devinfo,
2466                           const struct v3d_qpu_instr *instr,
2467                           uint64_t *packed_instr)
2468 {
2469         *packed_instr |= QPU_SET_FIELD(16, V3D_QPU_SIG);
2470 
2471         if (instr->branch.cond != V3D_QPU_BRANCH_COND_ALWAYS) {
2472                 *packed_instr |= QPU_SET_FIELD(2 + (instr->branch.cond -
2473                                                     V3D_QPU_BRANCH_COND_A0),
2474                                                V3D_QPU_BRANCH_COND);
2475         }
2476 
2477         *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2478                                        V3D_QPU_BRANCH_MSFIGN);
2479 
2480         *packed_instr |= QPU_SET_FIELD(instr->branch.bdi,
2481                                        V3D_QPU_BRANCH_BDI);
2482 
2483         if (instr->branch.ub) {
2484                 *packed_instr |= V3D_QPU_BRANCH_UB;
2485                 *packed_instr |= QPU_SET_FIELD(instr->branch.bdu,
2486                                                V3D_QPU_BRANCH_BDU);
2487         }
2488 
2489         switch (instr->branch.bdi) {
2490         case V3D_QPU_BRANCH_DEST_ABS:
2491         case V3D_QPU_BRANCH_DEST_REL:
2492                 *packed_instr |= QPU_SET_FIELD(instr->branch.msfign,
2493                                                V3D_QPU_BRANCH_MSFIGN);
2494 
2495                 *packed_instr |= QPU_SET_FIELD((instr->branch.offset &
2496                                                 ~0xff000000) >> 3,
2497                                                V3D_QPU_BRANCH_ADDR_LOW);
2498 
2499                 *packed_instr |= QPU_SET_FIELD(instr->branch.offset >> 24,
2500                                                V3D_QPU_BRANCH_ADDR_HIGH);
2501                 break;
2502         default:
2503                 break;
2504         }
2505 
2506         if (instr->branch.bdi == V3D_QPU_BRANCH_DEST_REGFILE ||
2507             instr->branch.bdu == V3D_QPU_BRANCH_DEST_REGFILE) {
2508                 *packed_instr |= QPU_SET_FIELD(instr->branch.raddr_a,
2509                                                V3D_QPU_RADDR_A);
2510         }
2511 
2512         return true;
2513 }
2514 
2515 bool
v3d_qpu_instr_pack(const struct v3d_device_info * devinfo,const struct v3d_qpu_instr * instr,uint64_t * packed_instr)2516 v3d_qpu_instr_pack(const struct v3d_device_info *devinfo,
2517                    const struct v3d_qpu_instr *instr,
2518                    uint64_t *packed_instr)
2519 {
2520         *packed_instr = 0;
2521 
2522         switch (instr->type) {
2523         case V3D_QPU_INSTR_TYPE_ALU:
2524                 return v3d_qpu_instr_pack_alu(devinfo, instr, packed_instr);
2525         case V3D_QPU_INSTR_TYPE_BRANCH:
2526                 return v3d_qpu_instr_pack_branch(devinfo, instr, packed_instr);
2527         default:
2528                 return false;
2529         }
2530 }
2531