1*61046927SAndroid Build Coastguard Worker /*
2*61046927SAndroid Build Coastguard Worker * Copyright © 2022 Collabora Ltd.
3*61046927SAndroid Build Coastguard Worker * SPDX-License-Identifier: MIT
4*61046927SAndroid Build Coastguard Worker */
5*61046927SAndroid Build Coastguard Worker #ifndef MME_BUILDER_H
6*61046927SAndroid Build Coastguard Worker #define MME_BUILDER_H
7*61046927SAndroid Build Coastguard Worker
8*61046927SAndroid Build Coastguard Worker #include "mme_value.h"
9*61046927SAndroid Build Coastguard Worker #include "mme_tu104.h"
10*61046927SAndroid Build Coastguard Worker #include "nv_device_info.h"
11*61046927SAndroid Build Coastguard Worker
12*61046927SAndroid Build Coastguard Worker #include "util/bitscan.h"
13*61046927SAndroid Build Coastguard Worker #include "util/enum_operators.h"
14*61046927SAndroid Build Coastguard Worker
15*61046927SAndroid Build Coastguard Worker #ifdef __cplusplus
16*61046927SAndroid Build Coastguard Worker extern "C" {
17*61046927SAndroid Build Coastguard Worker #endif
18*61046927SAndroid Build Coastguard Worker
19*61046927SAndroid Build Coastguard Worker enum mme_alu_op {
20*61046927SAndroid Build Coastguard Worker MME_ALU_OP_ADD,
21*61046927SAndroid Build Coastguard Worker MME_ALU_OP_ADDC,
22*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SUB,
23*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SUBB,
24*61046927SAndroid Build Coastguard Worker MME_ALU_OP_MUL,
25*61046927SAndroid Build Coastguard Worker MME_ALU_OP_MULH,
26*61046927SAndroid Build Coastguard Worker MME_ALU_OP_MULU,
27*61046927SAndroid Build Coastguard Worker MME_ALU_OP_CLZ,
28*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SLL,
29*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SRL,
30*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SRA,
31*61046927SAndroid Build Coastguard Worker MME_ALU_OP_NOT,
32*61046927SAndroid Build Coastguard Worker MME_ALU_OP_AND,
33*61046927SAndroid Build Coastguard Worker MME_ALU_OP_AND_NOT,
34*61046927SAndroid Build Coastguard Worker MME_ALU_OP_NAND,
35*61046927SAndroid Build Coastguard Worker MME_ALU_OP_OR,
36*61046927SAndroid Build Coastguard Worker MME_ALU_OP_XOR,
37*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SLT,
38*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SLTU,
39*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SLE,
40*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SLEU,
41*61046927SAndroid Build Coastguard Worker MME_ALU_OP_SEQ,
42*61046927SAndroid Build Coastguard Worker MME_ALU_OP_DREAD,
43*61046927SAndroid Build Coastguard Worker MME_ALU_OP_DWRITE,
44*61046927SAndroid Build Coastguard Worker };
45*61046927SAndroid Build Coastguard Worker
46*61046927SAndroid Build Coastguard Worker enum mme_cmp_op {
47*61046927SAndroid Build Coastguard Worker MME_CMP_OP_LT,
48*61046927SAndroid Build Coastguard Worker MME_CMP_OP_LTU,
49*61046927SAndroid Build Coastguard Worker MME_CMP_OP_LE,
50*61046927SAndroid Build Coastguard Worker MME_CMP_OP_LEU,
51*61046927SAndroid Build Coastguard Worker MME_CMP_OP_EQ,
52*61046927SAndroid Build Coastguard Worker };
53*61046927SAndroid Build Coastguard Worker
54*61046927SAndroid Build Coastguard Worker enum mme_cf_type {
55*61046927SAndroid Build Coastguard Worker MME_CF_TYPE_IF,
56*61046927SAndroid Build Coastguard Worker MME_CF_TYPE_LOOP,
57*61046927SAndroid Build Coastguard Worker MME_CF_TYPE_WHILE,
58*61046927SAndroid Build Coastguard Worker };
59*61046927SAndroid Build Coastguard Worker
60*61046927SAndroid Build Coastguard Worker struct mme_cf {
61*61046927SAndroid Build Coastguard Worker enum mme_cf_type type;
62*61046927SAndroid Build Coastguard Worker uint16_t start_ip;
63*61046927SAndroid Build Coastguard Worker };
64*61046927SAndroid Build Coastguard Worker
65*61046927SAndroid Build Coastguard Worker struct mme_builder;
66*61046927SAndroid Build Coastguard Worker
67*61046927SAndroid Build Coastguard Worker #include "mme_tu104_builder.h"
68*61046927SAndroid Build Coastguard Worker #include "mme_fermi_builder.h"
69*61046927SAndroid Build Coastguard Worker
70*61046927SAndroid Build Coastguard Worker #define MME_CLS_FERMI 0x9000
71*61046927SAndroid Build Coastguard Worker #define MME_CLS_TURING 0xc500
72*61046927SAndroid Build Coastguard Worker
73*61046927SAndroid Build Coastguard Worker struct mme_builder {
74*61046927SAndroid Build Coastguard Worker const struct nv_device_info *devinfo;
75*61046927SAndroid Build Coastguard Worker struct mme_reg_alloc reg_alloc;
76*61046927SAndroid Build Coastguard Worker union {
77*61046927SAndroid Build Coastguard Worker struct mme_tu104_builder tu104;
78*61046927SAndroid Build Coastguard Worker struct mme_fermi_builder fermi;
79*61046927SAndroid Build Coastguard Worker };
80*61046927SAndroid Build Coastguard Worker };
81*61046927SAndroid Build Coastguard Worker
82*61046927SAndroid Build Coastguard Worker static inline void
mme_builder_init(struct mme_builder * b,const struct nv_device_info * dev)83*61046927SAndroid Build Coastguard Worker mme_builder_init(struct mme_builder *b, const struct nv_device_info *dev)
84*61046927SAndroid Build Coastguard Worker {
85*61046927SAndroid Build Coastguard Worker memset(b, 0, sizeof(*b));
86*61046927SAndroid Build Coastguard Worker b->devinfo = dev;
87*61046927SAndroid Build Coastguard Worker
88*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
89*61046927SAndroid Build Coastguard Worker mme_tu104_builder_init(b);
90*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
91*61046927SAndroid Build Coastguard Worker mme_fermi_builder_init(b);
92*61046927SAndroid Build Coastguard Worker else
93*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
94*61046927SAndroid Build Coastguard Worker }
95*61046927SAndroid Build Coastguard Worker
96*61046927SAndroid Build Coastguard Worker static inline uint32_t *
mme_builder_finish(struct mme_builder * b,size_t * size_out)97*61046927SAndroid Build Coastguard Worker mme_builder_finish(struct mme_builder *b, size_t *size_out)
98*61046927SAndroid Build Coastguard Worker {
99*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
100*61046927SAndroid Build Coastguard Worker return mme_tu104_builder_finish(&b->tu104, size_out);
101*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
102*61046927SAndroid Build Coastguard Worker return mme_fermi_builder_finish(&b->fermi, size_out);
103*61046927SAndroid Build Coastguard Worker else
104*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
105*61046927SAndroid Build Coastguard Worker }
106*61046927SAndroid Build Coastguard Worker
107*61046927SAndroid Build Coastguard Worker static inline void
mme_builder_dump(struct mme_builder * b,FILE * fp)108*61046927SAndroid Build Coastguard Worker mme_builder_dump(struct mme_builder *b, FILE *fp)
109*61046927SAndroid Build Coastguard Worker {
110*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
111*61046927SAndroid Build Coastguard Worker mme_tu104_builder_dump(b, fp);
112*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
113*61046927SAndroid Build Coastguard Worker mme_fermi_builder_dump(b, fp);
114*61046927SAndroid Build Coastguard Worker else
115*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
116*61046927SAndroid Build Coastguard Worker }
117*61046927SAndroid Build Coastguard Worker
118*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_alloc_reg(struct mme_builder * b)119*61046927SAndroid Build Coastguard Worker mme_alloc_reg(struct mme_builder *b)
120*61046927SAndroid Build Coastguard Worker {
121*61046927SAndroid Build Coastguard Worker return mme_reg_alloc_alloc(&b->reg_alloc);
122*61046927SAndroid Build Coastguard Worker }
123*61046927SAndroid Build Coastguard Worker
124*61046927SAndroid Build Coastguard Worker static inline void
mme_realloc_reg(struct mme_builder * b,struct mme_value value)125*61046927SAndroid Build Coastguard Worker mme_realloc_reg(struct mme_builder *b, struct mme_value value)
126*61046927SAndroid Build Coastguard Worker {
127*61046927SAndroid Build Coastguard Worker return mme_reg_alloc_realloc(&b->reg_alloc, value);
128*61046927SAndroid Build Coastguard Worker }
129*61046927SAndroid Build Coastguard Worker
130*61046927SAndroid Build Coastguard Worker static inline void
mme_free_reg(struct mme_builder * b,struct mme_value val)131*61046927SAndroid Build Coastguard Worker mme_free_reg(struct mme_builder *b, struct mme_value val)
132*61046927SAndroid Build Coastguard Worker {
133*61046927SAndroid Build Coastguard Worker mme_reg_alloc_free(&b->reg_alloc, val);
134*61046927SAndroid Build Coastguard Worker }
135*61046927SAndroid Build Coastguard Worker
136*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_alloc_reg64(struct mme_builder * b)137*61046927SAndroid Build Coastguard Worker mme_alloc_reg64(struct mme_builder *b)
138*61046927SAndroid Build Coastguard Worker {
139*61046927SAndroid Build Coastguard Worker struct mme_value lo = mme_alloc_reg(b);
140*61046927SAndroid Build Coastguard Worker struct mme_value hi = mme_alloc_reg(b);
141*61046927SAndroid Build Coastguard Worker return mme_value64(lo, hi);
142*61046927SAndroid Build Coastguard Worker }
143*61046927SAndroid Build Coastguard Worker
144*61046927SAndroid Build Coastguard Worker static inline void
mme_free_reg64(struct mme_builder * b,struct mme_value64 val)145*61046927SAndroid Build Coastguard Worker mme_free_reg64(struct mme_builder *b, struct mme_value64 val)
146*61046927SAndroid Build Coastguard Worker {
147*61046927SAndroid Build Coastguard Worker mme_reg_alloc_free(&b->reg_alloc, val.lo);
148*61046927SAndroid Build Coastguard Worker mme_reg_alloc_free(&b->reg_alloc, val.hi);
149*61046927SAndroid Build Coastguard Worker }
150*61046927SAndroid Build Coastguard Worker
151*61046927SAndroid Build Coastguard Worker static inline void
mme_alu_to(struct mme_builder * b,struct mme_value dst,enum mme_alu_op op,struct mme_value x,struct mme_value y)152*61046927SAndroid Build Coastguard Worker mme_alu_to(struct mme_builder *b,
153*61046927SAndroid Build Coastguard Worker struct mme_value dst,
154*61046927SAndroid Build Coastguard Worker enum mme_alu_op op,
155*61046927SAndroid Build Coastguard Worker struct mme_value x,
156*61046927SAndroid Build Coastguard Worker struct mme_value y)
157*61046927SAndroid Build Coastguard Worker {
158*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
159*61046927SAndroid Build Coastguard Worker mme_tu104_alu_to(b, dst, op, x, y);
160*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
161*61046927SAndroid Build Coastguard Worker mme_fermi_alu_to(b, dst, op, x, y);
162*61046927SAndroid Build Coastguard Worker else
163*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
164*61046927SAndroid Build Coastguard Worker }
165*61046927SAndroid Build Coastguard Worker
166*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_alu(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)167*61046927SAndroid Build Coastguard Worker mme_alu(struct mme_builder *b,
168*61046927SAndroid Build Coastguard Worker enum mme_alu_op op,
169*61046927SAndroid Build Coastguard Worker struct mme_value x,
170*61046927SAndroid Build Coastguard Worker struct mme_value y)
171*61046927SAndroid Build Coastguard Worker {
172*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
173*61046927SAndroid Build Coastguard Worker mme_alu_to(b, dst, op, x, y);
174*61046927SAndroid Build Coastguard Worker return dst;
175*61046927SAndroid Build Coastguard Worker }
176*61046927SAndroid Build Coastguard Worker
177*61046927SAndroid Build Coastguard Worker static inline void
mme_alu_no_dst(struct mme_builder * b,enum mme_alu_op op,struct mme_value x,struct mme_value y)178*61046927SAndroid Build Coastguard Worker mme_alu_no_dst(struct mme_builder *b,
179*61046927SAndroid Build Coastguard Worker enum mme_alu_op op,
180*61046927SAndroid Build Coastguard Worker struct mme_value x,
181*61046927SAndroid Build Coastguard Worker struct mme_value y)
182*61046927SAndroid Build Coastguard Worker {
183*61046927SAndroid Build Coastguard Worker mme_alu_to(b, mme_zero(), op, x, y);
184*61046927SAndroid Build Coastguard Worker }
185*61046927SAndroid Build Coastguard Worker
186*61046927SAndroid Build Coastguard Worker static inline void
mme_alu64_to(struct mme_builder * b,struct mme_value64 dst,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)187*61046927SAndroid Build Coastguard Worker mme_alu64_to(struct mme_builder *b,
188*61046927SAndroid Build Coastguard Worker struct mme_value64 dst,
189*61046927SAndroid Build Coastguard Worker enum mme_alu_op op_lo,
190*61046927SAndroid Build Coastguard Worker enum mme_alu_op op_hi,
191*61046927SAndroid Build Coastguard Worker struct mme_value64 x,
192*61046927SAndroid Build Coastguard Worker struct mme_value64 y)
193*61046927SAndroid Build Coastguard Worker {
194*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
195*61046927SAndroid Build Coastguard Worker mme_tu104_alu64_to(b, dst, op_lo, op_hi, x, y);
196*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
197*61046927SAndroid Build Coastguard Worker mme_fermi_alu64_to(b, dst, op_lo, op_hi, x, y);
198*61046927SAndroid Build Coastguard Worker else
199*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
200*61046927SAndroid Build Coastguard Worker }
201*61046927SAndroid Build Coastguard Worker
202*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_alu64(struct mme_builder * b,enum mme_alu_op op_lo,enum mme_alu_op op_hi,struct mme_value64 x,struct mme_value64 y)203*61046927SAndroid Build Coastguard Worker mme_alu64(struct mme_builder *b,
204*61046927SAndroid Build Coastguard Worker enum mme_alu_op op_lo, enum mme_alu_op op_hi,
205*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
206*61046927SAndroid Build Coastguard Worker {
207*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = {
208*61046927SAndroid Build Coastguard Worker mme_alloc_reg(b),
209*61046927SAndroid Build Coastguard Worker mme_alloc_reg(b),
210*61046927SAndroid Build Coastguard Worker };
211*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, op_lo, op_hi, x, y);
212*61046927SAndroid Build Coastguard Worker return dst;
213*61046927SAndroid Build Coastguard Worker }
214*61046927SAndroid Build Coastguard Worker
215*61046927SAndroid Build Coastguard Worker #define MME_DEF_ALU1(op, OP) \
216*61046927SAndroid Build Coastguard Worker static inline void \
217*61046927SAndroid Build Coastguard Worker mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
218*61046927SAndroid Build Coastguard Worker struct mme_value x) \
219*61046927SAndroid Build Coastguard Worker { \
220*61046927SAndroid Build Coastguard Worker mme_alu_to(b, dst, MME_ALU_OP_##OP, x, mme_zero()); \
221*61046927SAndroid Build Coastguard Worker } \
222*61046927SAndroid Build Coastguard Worker \
223*61046927SAndroid Build Coastguard Worker static inline struct mme_value \
224*61046927SAndroid Build Coastguard Worker mme_##op(struct mme_builder *b, \
225*61046927SAndroid Build Coastguard Worker struct mme_value x) \
226*61046927SAndroid Build Coastguard Worker { \
227*61046927SAndroid Build Coastguard Worker return mme_alu(b, MME_ALU_OP_##OP, x, mme_zero()); \
228*61046927SAndroid Build Coastguard Worker }
229*61046927SAndroid Build Coastguard Worker
230*61046927SAndroid Build Coastguard Worker #define MME_DEF_ALU2(op, OP) \
231*61046927SAndroid Build Coastguard Worker static inline void \
232*61046927SAndroid Build Coastguard Worker mme_##op##_to(struct mme_builder *b, struct mme_value dst, \
233*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y) \
234*61046927SAndroid Build Coastguard Worker { \
235*61046927SAndroid Build Coastguard Worker mme_alu_to(b, dst, MME_ALU_OP_##OP, x, y); \
236*61046927SAndroid Build Coastguard Worker } \
237*61046927SAndroid Build Coastguard Worker \
238*61046927SAndroid Build Coastguard Worker static inline struct mme_value \
239*61046927SAndroid Build Coastguard Worker mme_##op(struct mme_builder *b, \
240*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y) \
241*61046927SAndroid Build Coastguard Worker { \
242*61046927SAndroid Build Coastguard Worker return mme_alu(b, MME_ALU_OP_##OP, x, y); \
243*61046927SAndroid Build Coastguard Worker }
244*61046927SAndroid Build Coastguard Worker
245*61046927SAndroid Build Coastguard Worker MME_DEF_ALU1(mov, ADD);
246*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(add, ADD);
247*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sub, SUB);
248*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(mul, MUL);
249*61046927SAndroid Build Coastguard Worker MME_DEF_ALU1(clz, CLZ);
250*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sll, SLL);
251*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(srl, SRL);
252*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sra, SRA);
253*61046927SAndroid Build Coastguard Worker MME_DEF_ALU1(not, NOT);
254*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(and, AND);
255*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(and_not,AND_NOT);
256*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(nand, NAND);
257*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(or, OR);
258*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(xor, XOR);
259*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(slt, SLT);
260*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sltu, SLTU);
261*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sle, SLE);
262*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(sleu, SLEU);
263*61046927SAndroid Build Coastguard Worker MME_DEF_ALU2(seq, SEQ);
264*61046927SAndroid Build Coastguard Worker MME_DEF_ALU1(dread, DREAD);
265*61046927SAndroid Build Coastguard Worker
266*61046927SAndroid Build Coastguard Worker #undef MME_DEF_ALU1
267*61046927SAndroid Build Coastguard Worker #undef MME_DEF_ALU2
268*61046927SAndroid Build Coastguard Worker
269*61046927SAndroid Build Coastguard Worker static inline void
mme_mov64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x)270*61046927SAndroid Build Coastguard Worker mme_mov64_to(struct mme_builder *b, struct mme_value64 dst,
271*61046927SAndroid Build Coastguard Worker struct mme_value64 x)
272*61046927SAndroid Build Coastguard Worker {
273*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
274*61046927SAndroid Build Coastguard Worker }
275*61046927SAndroid Build Coastguard Worker
276*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_mov64(struct mme_builder * b,struct mme_value64 x)277*61046927SAndroid Build Coastguard Worker mme_mov64(struct mme_builder *b, struct mme_value64 x)
278*61046927SAndroid Build Coastguard Worker {
279*61046927SAndroid Build Coastguard Worker return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADD, x, mme_imm64(0));
280*61046927SAndroid Build Coastguard Worker }
281*61046927SAndroid Build Coastguard Worker
282*61046927SAndroid Build Coastguard Worker static inline void
mme_add64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)283*61046927SAndroid Build Coastguard Worker mme_add64_to(struct mme_builder *b, struct mme_value64 dst,
284*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
285*61046927SAndroid Build Coastguard Worker {
286*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
287*61046927SAndroid Build Coastguard Worker }
288*61046927SAndroid Build Coastguard Worker
289*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_add64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)290*61046927SAndroid Build Coastguard Worker mme_add64(struct mme_builder *b,
291*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
292*61046927SAndroid Build Coastguard Worker {
293*61046927SAndroid Build Coastguard Worker return mme_alu64(b, MME_ALU_OP_ADD, MME_ALU_OP_ADDC, x, y);
294*61046927SAndroid Build Coastguard Worker }
295*61046927SAndroid Build Coastguard Worker
296*61046927SAndroid Build Coastguard Worker static inline void
mme_sub64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value64 x,struct mme_value64 y)297*61046927SAndroid Build Coastguard Worker mme_sub64_to(struct mme_builder *b, struct mme_value64 dst,
298*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
299*61046927SAndroid Build Coastguard Worker {
300*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
301*61046927SAndroid Build Coastguard Worker }
302*61046927SAndroid Build Coastguard Worker
303*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_sub64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)304*61046927SAndroid Build Coastguard Worker mme_sub64(struct mme_builder *b,
305*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
306*61046927SAndroid Build Coastguard Worker {
307*61046927SAndroid Build Coastguard Worker return mme_alu64(b, MME_ALU_OP_SUB, MME_ALU_OP_SUBB, x, y);
308*61046927SAndroid Build Coastguard Worker }
309*61046927SAndroid Build Coastguard Worker
310*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_mul_32x32_32_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)311*61046927SAndroid Build Coastguard Worker mme_mul_32x32_32_free_srcs(struct mme_builder *b,
312*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
313*61046927SAndroid Build Coastguard Worker {
314*61046927SAndroid Build Coastguard Worker assert(x.type == MME_VALUE_TYPE_REG);
315*61046927SAndroid Build Coastguard Worker assert(y.type == MME_VALUE_TYPE_REG);
316*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
317*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_mul(b, x, y);
318*61046927SAndroid Build Coastguard Worker mme_free_reg(b, x);
319*61046927SAndroid Build Coastguard Worker mme_free_reg(b, y);
320*61046927SAndroid Build Coastguard Worker return dst;
321*61046927SAndroid Build Coastguard Worker } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
322*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
323*61046927SAndroid Build Coastguard Worker mme_fermi_umul_32x32_32_to_free_srcs(b, dst, x, y);
324*61046927SAndroid Build Coastguard Worker return dst;
325*61046927SAndroid Build Coastguard Worker } else {
326*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
327*61046927SAndroid Build Coastguard Worker }
328*61046927SAndroid Build Coastguard Worker }
329*61046927SAndroid Build Coastguard Worker
330*61046927SAndroid Build Coastguard Worker static inline void
mme_imul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)331*61046927SAndroid Build Coastguard Worker mme_imul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
332*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
333*61046927SAndroid Build Coastguard Worker {
334*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
335*61046927SAndroid Build Coastguard Worker mme_value64(x, mme_zero()),
336*61046927SAndroid Build Coastguard Worker mme_value64(y, mme_zero()));
337*61046927SAndroid Build Coastguard Worker }
338*61046927SAndroid Build Coastguard Worker
339*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_imul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)340*61046927SAndroid Build Coastguard Worker mme_imul_32x32_64(struct mme_builder *b,
341*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
342*61046927SAndroid Build Coastguard Worker {
343*61046927SAndroid Build Coastguard Worker return mme_alu64(b, MME_ALU_OP_MUL, MME_ALU_OP_MULH,
344*61046927SAndroid Build Coastguard Worker mme_value64(x, mme_zero()),
345*61046927SAndroid Build Coastguard Worker mme_value64(y, mme_zero()));
346*61046927SAndroid Build Coastguard Worker }
347*61046927SAndroid Build Coastguard Worker
348*61046927SAndroid Build Coastguard Worker static inline void
mme_umul_32x32_64_to(struct mme_builder * b,struct mme_value64 dst,struct mme_value x,struct mme_value y)349*61046927SAndroid Build Coastguard Worker mme_umul_32x32_64_to(struct mme_builder *b, struct mme_value64 dst,
350*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
351*61046927SAndroid Build Coastguard Worker {
352*61046927SAndroid Build Coastguard Worker assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
353*61046927SAndroid Build Coastguard Worker mme_alu64_to(b, dst, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
354*61046927SAndroid Build Coastguard Worker mme_value64(x, mme_zero()),
355*61046927SAndroid Build Coastguard Worker mme_value64(y, mme_zero()));
356*61046927SAndroid Build Coastguard Worker }
357*61046927SAndroid Build Coastguard Worker
358*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_umul_32x32_64(struct mme_builder * b,struct mme_value x,struct mme_value y)359*61046927SAndroid Build Coastguard Worker mme_umul_32x32_64(struct mme_builder *b,
360*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
361*61046927SAndroid Build Coastguard Worker {
362*61046927SAndroid Build Coastguard Worker assert(b->devinfo->cls_eng3d >= MME_CLS_TURING);
363*61046927SAndroid Build Coastguard Worker return mme_alu64(b, MME_ALU_OP_MULU, MME_ALU_OP_MULH,
364*61046927SAndroid Build Coastguard Worker mme_value64(x, mme_zero()),
365*61046927SAndroid Build Coastguard Worker mme_value64(y, mme_zero()));
366*61046927SAndroid Build Coastguard Worker }
367*61046927SAndroid Build Coastguard Worker
368*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_umul_32x32_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value y)369*61046927SAndroid Build Coastguard Worker mme_umul_32x32_64_free_srcs(struct mme_builder *b,
370*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y)
371*61046927SAndroid Build Coastguard Worker {
372*61046927SAndroid Build Coastguard Worker assert(x.type == MME_VALUE_TYPE_REG);
373*61046927SAndroid Build Coastguard Worker assert(y.type == MME_VALUE_TYPE_REG);
374*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
375*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = mme_umul_32x32_64(b, x, y);
376*61046927SAndroid Build Coastguard Worker mme_free_reg(b, x);
377*61046927SAndroid Build Coastguard Worker mme_free_reg(b, y);
378*61046927SAndroid Build Coastguard Worker return dst;
379*61046927SAndroid Build Coastguard Worker } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
380*61046927SAndroid Build Coastguard Worker struct mme_value y_hi = mme_mov(b, mme_zero());
381*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = mme_alloc_reg64(b);
382*61046927SAndroid Build Coastguard Worker mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, mme_value64(y, y_hi));
383*61046927SAndroid Build Coastguard Worker return dst;
384*61046927SAndroid Build Coastguard Worker } else {
385*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
386*61046927SAndroid Build Coastguard Worker }
387*61046927SAndroid Build Coastguard Worker }
388*61046927SAndroid Build Coastguard Worker
389*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_umul_32x64_64_free_srcs(struct mme_builder * b,struct mme_value x,struct mme_value64 y)390*61046927SAndroid Build Coastguard Worker mme_umul_32x64_64_free_srcs(struct mme_builder *b,
391*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value64 y)
392*61046927SAndroid Build Coastguard Worker {
393*61046927SAndroid Build Coastguard Worker assert(x.type == MME_VALUE_TYPE_REG);
394*61046927SAndroid Build Coastguard Worker assert(y.lo.type == MME_VALUE_TYPE_REG);
395*61046927SAndroid Build Coastguard Worker assert(y.hi.type == MME_VALUE_TYPE_REG);
396*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
397*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = mme_umul_32x32_64(b, x, y.lo);
398*61046927SAndroid Build Coastguard Worker struct mme_value tmp = mme_mul(b, x, y.hi);
399*61046927SAndroid Build Coastguard Worker mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
400*61046927SAndroid Build Coastguard Worker mme_free_reg(b, x);
401*61046927SAndroid Build Coastguard Worker mme_free_reg64(b, y);
402*61046927SAndroid Build Coastguard Worker return dst;
403*61046927SAndroid Build Coastguard Worker } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
404*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = mme_alloc_reg64(b);
405*61046927SAndroid Build Coastguard Worker mme_fermi_umul_32x64_64_to_free_srcs(b, dst, x, y);
406*61046927SAndroid Build Coastguard Worker return dst;
407*61046927SAndroid Build Coastguard Worker } else {
408*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
409*61046927SAndroid Build Coastguard Worker }
410*61046927SAndroid Build Coastguard Worker }
411*61046927SAndroid Build Coastguard Worker
412*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_mul64(struct mme_builder * b,struct mme_value64 x,struct mme_value64 y)413*61046927SAndroid Build Coastguard Worker mme_mul64(struct mme_builder *b,
414*61046927SAndroid Build Coastguard Worker struct mme_value64 x, struct mme_value64 y)
415*61046927SAndroid Build Coastguard Worker {
416*61046927SAndroid Build Coastguard Worker if (mme_is_zero(x.hi) && mme_is_zero(y.hi))
417*61046927SAndroid Build Coastguard Worker return mme_umul_32x32_64(b, x.lo, y.lo);
418*61046927SAndroid Build Coastguard Worker
419*61046927SAndroid Build Coastguard Worker struct mme_value64 dst = mme_umul_32x32_64(b, x.lo, y.lo);
420*61046927SAndroid Build Coastguard Worker struct mme_value tmp = mme_alloc_reg(b);
421*61046927SAndroid Build Coastguard Worker
422*61046927SAndroid Build Coastguard Worker mme_mul_to(b, tmp, x.lo, y.hi);
423*61046927SAndroid Build Coastguard Worker mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
424*61046927SAndroid Build Coastguard Worker
425*61046927SAndroid Build Coastguard Worker mme_mul_to(b, tmp, x.hi, y.lo);
426*61046927SAndroid Build Coastguard Worker mme_add64_to(b, dst, dst, mme_value64(mme_zero(), tmp));
427*61046927SAndroid Build Coastguard Worker
428*61046927SAndroid Build Coastguard Worker mme_free_reg(b, tmp);
429*61046927SAndroid Build Coastguard Worker
430*61046927SAndroid Build Coastguard Worker return dst;
431*61046927SAndroid Build Coastguard Worker }
432*61046927SAndroid Build Coastguard Worker
433*61046927SAndroid Build Coastguard Worker static inline void
mme_bfe_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value pos,uint8_t bits)434*61046927SAndroid Build Coastguard Worker mme_bfe_to(struct mme_builder *b, struct mme_value dst,
435*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value pos, uint8_t bits)
436*61046927SAndroid Build Coastguard Worker {
437*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) {
438*61046927SAndroid Build Coastguard Worker mme_srl_to(b, dst, x, pos);
439*61046927SAndroid Build Coastguard Worker mme_and_to(b, dst, dst, mme_imm(BITFIELD_MASK(bits)));
440*61046927SAndroid Build Coastguard Worker } else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) {
441*61046927SAndroid Build Coastguard Worker mme_fermi_bfe_to(b, dst, x, pos, bits);
442*61046927SAndroid Build Coastguard Worker } else {
443*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
444*61046927SAndroid Build Coastguard Worker }
445*61046927SAndroid Build Coastguard Worker }
446*61046927SAndroid Build Coastguard Worker
447*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_bfe(struct mme_builder * b,struct mme_value x,struct mme_value pos,uint8_t bits)448*61046927SAndroid Build Coastguard Worker mme_bfe(struct mme_builder *b,
449*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value pos, uint8_t bits)
450*61046927SAndroid Build Coastguard Worker {
451*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
452*61046927SAndroid Build Coastguard Worker mme_bfe_to(b, dst, x, pos, bits);
453*61046927SAndroid Build Coastguard Worker return dst;
454*61046927SAndroid Build Coastguard Worker }
455*61046927SAndroid Build Coastguard Worker
456*61046927SAndroid Build Coastguard Worker static inline void
mme_merge_to(struct mme_builder * b,struct mme_value dst,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)457*61046927SAndroid Build Coastguard Worker mme_merge_to(struct mme_builder *b, struct mme_value dst,
458*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y,
459*61046927SAndroid Build Coastguard Worker uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
460*61046927SAndroid Build Coastguard Worker {
461*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
462*61046927SAndroid Build Coastguard Worker mme_tu104_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
463*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
464*61046927SAndroid Build Coastguard Worker mme_fermi_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
465*61046927SAndroid Build Coastguard Worker else
466*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
467*61046927SAndroid Build Coastguard Worker }
468*61046927SAndroid Build Coastguard Worker
469*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_merge(struct mme_builder * b,struct mme_value x,struct mme_value y,uint16_t dst_pos,uint16_t bits,uint16_t src_pos)470*61046927SAndroid Build Coastguard Worker mme_merge(struct mme_builder *b,
471*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y,
472*61046927SAndroid Build Coastguard Worker uint16_t dst_pos, uint16_t bits, uint16_t src_pos)
473*61046927SAndroid Build Coastguard Worker {
474*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
475*61046927SAndroid Build Coastguard Worker mme_merge_to(b, dst, x, y, dst_pos, bits, src_pos);
476*61046927SAndroid Build Coastguard Worker return dst;
477*61046927SAndroid Build Coastguard Worker }
478*61046927SAndroid Build Coastguard Worker
479*61046927SAndroid Build Coastguard Worker #define mme_set_field(b, x, FIELD, val) \
480*61046927SAndroid Build Coastguard Worker mme_merge_to(b, x, x, val, DRF_LO(FIELD), DRF_BITS(FIELD), 0)
481*61046927SAndroid Build Coastguard Worker
482*61046927SAndroid Build Coastguard Worker #define mme_set_field_enum(b, x, FIELD, ENUM) \
483*61046927SAndroid Build Coastguard Worker mme_set_field(b, x, FIELD, mme_imm(FIELD##_##ENUM)) \
484*61046927SAndroid Build Coastguard Worker
485*61046927SAndroid Build Coastguard Worker static inline void
mme_state_arr_to(struct mme_builder * b,struct mme_value dst,uint16_t state,struct mme_value index)486*61046927SAndroid Build Coastguard Worker mme_state_arr_to(struct mme_builder *b, struct mme_value dst,
487*61046927SAndroid Build Coastguard Worker uint16_t state, struct mme_value index)
488*61046927SAndroid Build Coastguard Worker {
489*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
490*61046927SAndroid Build Coastguard Worker mme_tu104_state_arr_to(b, dst, state, index);
491*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
492*61046927SAndroid Build Coastguard Worker mme_fermi_state_arr_to(b, dst, state, index);
493*61046927SAndroid Build Coastguard Worker else
494*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
495*61046927SAndroid Build Coastguard Worker }
496*61046927SAndroid Build Coastguard Worker
497*61046927SAndroid Build Coastguard Worker static inline void
mme_state_to(struct mme_builder * b,struct mme_value dst,uint16_t state)498*61046927SAndroid Build Coastguard Worker mme_state_to(struct mme_builder *b, struct mme_value dst,
499*61046927SAndroid Build Coastguard Worker uint16_t state)
500*61046927SAndroid Build Coastguard Worker {
501*61046927SAndroid Build Coastguard Worker mme_state_arr_to(b, dst, state, mme_zero());
502*61046927SAndroid Build Coastguard Worker }
503*61046927SAndroid Build Coastguard Worker
504*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_state_arr(struct mme_builder * b,uint16_t state,struct mme_value index)505*61046927SAndroid Build Coastguard Worker mme_state_arr(struct mme_builder *b,
506*61046927SAndroid Build Coastguard Worker uint16_t state, struct mme_value index)
507*61046927SAndroid Build Coastguard Worker {
508*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
509*61046927SAndroid Build Coastguard Worker mme_state_arr_to(b, dst, state, index);
510*61046927SAndroid Build Coastguard Worker return dst;
511*61046927SAndroid Build Coastguard Worker }
512*61046927SAndroid Build Coastguard Worker
513*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_state(struct mme_builder * b,uint16_t state)514*61046927SAndroid Build Coastguard Worker mme_state(struct mme_builder *b,
515*61046927SAndroid Build Coastguard Worker uint16_t state)
516*61046927SAndroid Build Coastguard Worker {
517*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
518*61046927SAndroid Build Coastguard Worker mme_state_to(b, dst, state);
519*61046927SAndroid Build Coastguard Worker return dst;
520*61046927SAndroid Build Coastguard Worker }
521*61046927SAndroid Build Coastguard Worker
522*61046927SAndroid Build Coastguard Worker static inline void
mme_dwrite(struct mme_builder * b,struct mme_value idx,struct mme_value val)523*61046927SAndroid Build Coastguard Worker mme_dwrite(struct mme_builder *b,
524*61046927SAndroid Build Coastguard Worker struct mme_value idx, struct mme_value val)
525*61046927SAndroid Build Coastguard Worker {
526*61046927SAndroid Build Coastguard Worker mme_alu_no_dst(b, MME_ALU_OP_DWRITE, idx, val);
527*61046927SAndroid Build Coastguard Worker }
528*61046927SAndroid Build Coastguard Worker
529*61046927SAndroid Build Coastguard Worker static inline void
mme_load_to(struct mme_builder * b,struct mme_value dst)530*61046927SAndroid Build Coastguard Worker mme_load_to(struct mme_builder *b, struct mme_value dst)
531*61046927SAndroid Build Coastguard Worker {
532*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
533*61046927SAndroid Build Coastguard Worker mme_tu104_load_to(b, dst);
534*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
535*61046927SAndroid Build Coastguard Worker mme_fermi_load_to(b, dst);
536*61046927SAndroid Build Coastguard Worker else
537*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
538*61046927SAndroid Build Coastguard Worker }
539*61046927SAndroid Build Coastguard Worker
540*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_tu104_load(struct mme_builder * b)541*61046927SAndroid Build Coastguard Worker mme_tu104_load(struct mme_builder *b)
542*61046927SAndroid Build Coastguard Worker {
543*61046927SAndroid Build Coastguard Worker struct mme_value dst = mme_alloc_reg(b);
544*61046927SAndroid Build Coastguard Worker mme_tu104_load_to(b, dst);
545*61046927SAndroid Build Coastguard Worker return dst;
546*61046927SAndroid Build Coastguard Worker }
547*61046927SAndroid Build Coastguard Worker
548*61046927SAndroid Build Coastguard Worker static inline struct mme_value
mme_load(struct mme_builder * b)549*61046927SAndroid Build Coastguard Worker mme_load(struct mme_builder *b)
550*61046927SAndroid Build Coastguard Worker {
551*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
552*61046927SAndroid Build Coastguard Worker return mme_tu104_load(b);
553*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
554*61046927SAndroid Build Coastguard Worker return mme_fermi_load(b);
555*61046927SAndroid Build Coastguard Worker else
556*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
557*61046927SAndroid Build Coastguard Worker }
558*61046927SAndroid Build Coastguard Worker
559*61046927SAndroid Build Coastguard Worker static inline struct mme_value64
mme_load_addr64(struct mme_builder * b)560*61046927SAndroid Build Coastguard Worker mme_load_addr64(struct mme_builder *b)
561*61046927SAndroid Build Coastguard Worker {
562*61046927SAndroid Build Coastguard Worker struct mme_value hi = mme_load(b);
563*61046927SAndroid Build Coastguard Worker struct mme_value lo = mme_load(b);
564*61046927SAndroid Build Coastguard Worker return mme_value64(lo, hi);
565*61046927SAndroid Build Coastguard Worker }
566*61046927SAndroid Build Coastguard Worker
567*61046927SAndroid Build Coastguard Worker static inline void
mme_mthd_arr(struct mme_builder * b,uint16_t mthd,struct mme_value index)568*61046927SAndroid Build Coastguard Worker mme_mthd_arr(struct mme_builder *b, uint16_t mthd,
569*61046927SAndroid Build Coastguard Worker struct mme_value index)
570*61046927SAndroid Build Coastguard Worker {
571*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
572*61046927SAndroid Build Coastguard Worker mme_tu104_mthd(b, mthd, index);
573*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
574*61046927SAndroid Build Coastguard Worker mme_fermi_mthd_arr(b, mthd, index);
575*61046927SAndroid Build Coastguard Worker else
576*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
577*61046927SAndroid Build Coastguard Worker }
578*61046927SAndroid Build Coastguard Worker
579*61046927SAndroid Build Coastguard Worker static inline void
mme_mthd(struct mme_builder * b,uint16_t mthd)580*61046927SAndroid Build Coastguard Worker mme_mthd(struct mme_builder *b, uint16_t mthd)
581*61046927SAndroid Build Coastguard Worker {
582*61046927SAndroid Build Coastguard Worker mme_mthd_arr(b, mthd, mme_zero());
583*61046927SAndroid Build Coastguard Worker }
584*61046927SAndroid Build Coastguard Worker
585*61046927SAndroid Build Coastguard Worker static inline void
mme_emit(struct mme_builder * b,struct mme_value data)586*61046927SAndroid Build Coastguard Worker mme_emit(struct mme_builder *b,
587*61046927SAndroid Build Coastguard Worker struct mme_value data)
588*61046927SAndroid Build Coastguard Worker {
589*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
590*61046927SAndroid Build Coastguard Worker mme_tu104_emit(b, data);
591*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
592*61046927SAndroid Build Coastguard Worker mme_fermi_emit(b, data);
593*61046927SAndroid Build Coastguard Worker else
594*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
595*61046927SAndroid Build Coastguard Worker }
596*61046927SAndroid Build Coastguard Worker
597*61046927SAndroid Build Coastguard Worker static inline void
mme_emit_addr64(struct mme_builder * b,struct mme_value64 addr)598*61046927SAndroid Build Coastguard Worker mme_emit_addr64(struct mme_builder *b, struct mme_value64 addr)
599*61046927SAndroid Build Coastguard Worker {
600*61046927SAndroid Build Coastguard Worker mme_emit(b, addr.hi);
601*61046927SAndroid Build Coastguard Worker mme_emit(b, addr.lo);
602*61046927SAndroid Build Coastguard Worker }
603*61046927SAndroid Build Coastguard Worker
604*61046927SAndroid Build Coastguard Worker static inline void
mme_tu104_read_fifoed(struct mme_builder * b,struct mme_value64 addr,struct mme_value count)605*61046927SAndroid Build Coastguard Worker mme_tu104_read_fifoed(struct mme_builder *b,
606*61046927SAndroid Build Coastguard Worker struct mme_value64 addr,
607*61046927SAndroid Build Coastguard Worker struct mme_value count)
608*61046927SAndroid Build Coastguard Worker {
609*61046927SAndroid Build Coastguard Worker mme_mthd(b, 0x0550 /* NVC597_SET_MME_MEM_ADDRESS_A */);
610*61046927SAndroid Build Coastguard Worker mme_emit_addr64(b, addr);
611*61046927SAndroid Build Coastguard Worker
612*61046927SAndroid Build Coastguard Worker mme_mthd(b, 0x0560 /* NVC597_MME_DMA_READ_FIFOED */);
613*61046927SAndroid Build Coastguard Worker mme_emit(b, count);
614*61046927SAndroid Build Coastguard Worker
615*61046927SAndroid Build Coastguard Worker mme_tu104_load_barrier(b);
616*61046927SAndroid Build Coastguard Worker }
617*61046927SAndroid Build Coastguard Worker
618*61046927SAndroid Build Coastguard Worker static inline void
mme_start_loop(struct mme_builder * b,struct mme_value count)619*61046927SAndroid Build Coastguard Worker mme_start_loop(struct mme_builder *b, struct mme_value count)
620*61046927SAndroid Build Coastguard Worker {
621*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
622*61046927SAndroid Build Coastguard Worker mme_tu104_start_loop(b, count);
623*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
624*61046927SAndroid Build Coastguard Worker mme_fermi_start_loop(b, count);
625*61046927SAndroid Build Coastguard Worker else
626*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
627*61046927SAndroid Build Coastguard Worker }
628*61046927SAndroid Build Coastguard Worker
629*61046927SAndroid Build Coastguard Worker static inline void
mme_end_loop(struct mme_builder * b)630*61046927SAndroid Build Coastguard Worker mme_end_loop(struct mme_builder *b)
631*61046927SAndroid Build Coastguard Worker {
632*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
633*61046927SAndroid Build Coastguard Worker mme_tu104_end_loop(b);
634*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
635*61046927SAndroid Build Coastguard Worker mme_fermi_end_loop(b);
636*61046927SAndroid Build Coastguard Worker else
637*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
638*61046927SAndroid Build Coastguard Worker }
639*61046927SAndroid Build Coastguard Worker
640*61046927SAndroid Build Coastguard Worker #define mme_loop(b, count) \
641*61046927SAndroid Build Coastguard Worker for (bool run = (mme_start_loop((b), count), true); run; \
642*61046927SAndroid Build Coastguard Worker run = false, mme_end_loop(b))
643*61046927SAndroid Build Coastguard Worker
644*61046927SAndroid Build Coastguard Worker #define MME_DEF_START_IF(op, OP, if_true) \
645*61046927SAndroid Build Coastguard Worker static inline void \
646*61046927SAndroid Build Coastguard Worker mme_start_if_##op(struct mme_builder *b, \
647*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y) \
648*61046927SAndroid Build Coastguard Worker { \
649*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
650*61046927SAndroid Build Coastguard Worker mme_tu104_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
651*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
652*61046927SAndroid Build Coastguard Worker mme_fermi_start_if(b, MME_CMP_OP_##OP, if_true, x, y); \
653*61046927SAndroid Build Coastguard Worker else \
654*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class"); \
655*61046927SAndroid Build Coastguard Worker }
656*61046927SAndroid Build Coastguard Worker
MME_DEF_START_IF(ilt,LT,true)657*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ilt, LT, true)
658*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ult, LTU, true)
659*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ile, LE, true)
660*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ule, LEU, true)
661*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ieq, EQ, true)
662*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ige, LT, false)
663*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(uge, LTU, false)
664*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(igt, LE, false)
665*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ugt, LEU, false)
666*61046927SAndroid Build Coastguard Worker MME_DEF_START_IF(ine, EQ, false)
667*61046927SAndroid Build Coastguard Worker
668*61046927SAndroid Build Coastguard Worker #undef MME_DEF_START_IF
669*61046927SAndroid Build Coastguard Worker
670*61046927SAndroid Build Coastguard Worker static inline void
671*61046927SAndroid Build Coastguard Worker mme_end_if(struct mme_builder *b)
672*61046927SAndroid Build Coastguard Worker {
673*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
674*61046927SAndroid Build Coastguard Worker mme_tu104_end_if(b);
675*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
676*61046927SAndroid Build Coastguard Worker mme_fermi_end_if(b);
677*61046927SAndroid Build Coastguard Worker else
678*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
679*61046927SAndroid Build Coastguard Worker }
680*61046927SAndroid Build Coastguard Worker
681*61046927SAndroid Build Coastguard Worker #define mme_if(b, cmp, x, y) \
682*61046927SAndroid Build Coastguard Worker for (bool run = (mme_start_if_##cmp((b), x, y), true); run; \
683*61046927SAndroid Build Coastguard Worker run = false, mme_end_if(b))
684*61046927SAndroid Build Coastguard Worker
685*61046927SAndroid Build Coastguard Worker static inline void
mme_start_while(struct mme_builder * b)686*61046927SAndroid Build Coastguard Worker mme_start_while(struct mme_builder *b)
687*61046927SAndroid Build Coastguard Worker {
688*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING)
689*61046927SAndroid Build Coastguard Worker mme_tu104_start_while(b);
690*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI)
691*61046927SAndroid Build Coastguard Worker mme_fermi_start_while(b);
692*61046927SAndroid Build Coastguard Worker else
693*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class");
694*61046927SAndroid Build Coastguard Worker }
695*61046927SAndroid Build Coastguard Worker
696*61046927SAndroid Build Coastguard Worker #define MME_DEF_END_WHILE(op, OP, if_true) \
697*61046927SAndroid Build Coastguard Worker static inline void \
698*61046927SAndroid Build Coastguard Worker mme_end_while_##op(struct mme_builder *b, \
699*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y) \
700*61046927SAndroid Build Coastguard Worker { \
701*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
702*61046927SAndroid Build Coastguard Worker mme_tu104_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
703*61046927SAndroid Build Coastguard Worker else if (b->devinfo->cls_eng3d >= MME_CLS_FERMI) \
704*61046927SAndroid Build Coastguard Worker mme_fermi_end_while(b, MME_CMP_OP_##OP, if_true, x, y); \
705*61046927SAndroid Build Coastguard Worker else \
706*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class"); \
707*61046927SAndroid Build Coastguard Worker }
708*61046927SAndroid Build Coastguard Worker
MME_DEF_END_WHILE(ilt,LT,true)709*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ilt, LT, true)
710*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ult, LTU, true)
711*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ile, LE, true)
712*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ule, LEU, true)
713*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ieq, EQ, true)
714*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ige, LT, false)
715*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(uge, LTU, false)
716*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(igt, LE, false)
717*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ugt, LEU, false)
718*61046927SAndroid Build Coastguard Worker MME_DEF_END_WHILE(ine, EQ, false)
719*61046927SAndroid Build Coastguard Worker
720*61046927SAndroid Build Coastguard Worker #define mme_while(b, cmp, x, y) \
721*61046927SAndroid Build Coastguard Worker for (bool run = (mme_start_while(b), true); run; \
722*61046927SAndroid Build Coastguard Worker run = false, mme_end_while_##cmp((b), x, y))
723*61046927SAndroid Build Coastguard Worker
724*61046927SAndroid Build Coastguard Worker #define MME_DEF_EXIT(op, OP, if_true) \
725*61046927SAndroid Build Coastguard Worker static inline void \
726*61046927SAndroid Build Coastguard Worker mme_exit_if_##op(struct mme_builder *b, \
727*61046927SAndroid Build Coastguard Worker struct mme_value x, struct mme_value y) \
728*61046927SAndroid Build Coastguard Worker { \
729*61046927SAndroid Build Coastguard Worker if (b->devinfo->cls_eng3d >= MME_CLS_TURING) \
730*61046927SAndroid Build Coastguard Worker mme_tu104_exit_if(b, MME_CMP_OP_##OP, if_true, x, y); \
731*61046927SAndroid Build Coastguard Worker else \
732*61046927SAndroid Build Coastguard Worker unreachable("Unsupported GPU class"); \
733*61046927SAndroid Build Coastguard Worker }
734*61046927SAndroid Build Coastguard Worker
735*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ilt, LT, true)
736*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ult, LTU, true)
737*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ile, LE, true)
738*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ule, LEU, true)
739*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ieq, EQ, true)
740*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ige, LT, false)
741*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(uge, LTU, false)
742*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(igt, LE, false)
743*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ugt, LEU, false)
744*61046927SAndroid Build Coastguard Worker MME_DEF_EXIT(ine, EQ, false)
745*61046927SAndroid Build Coastguard Worker
746*61046927SAndroid Build Coastguard Worker #undef MME_DEF_EXIT
747*61046927SAndroid Build Coastguard Worker
748*61046927SAndroid Build Coastguard Worker #define mme_exit_if(b, cmp, x, y) \
749*61046927SAndroid Build Coastguard Worker mme_exit_if_##cmp(b, x, y)
750*61046927SAndroid Build Coastguard Worker
751*61046927SAndroid Build Coastguard Worker static inline void
752*61046927SAndroid Build Coastguard Worker mme_exit(struct mme_builder *b)
753*61046927SAndroid Build Coastguard Worker {
754*61046927SAndroid Build Coastguard Worker mme_exit_if_ieq(b, mme_zero(), mme_zero());
755*61046927SAndroid Build Coastguard Worker }
756*61046927SAndroid Build Coastguard Worker
757*61046927SAndroid Build Coastguard Worker #ifdef __cplusplus
758*61046927SAndroid Build Coastguard Worker }
759*61046927SAndroid Build Coastguard Worker #endif
760*61046927SAndroid Build Coastguard Worker
761*61046927SAndroid Build Coastguard Worker #endif /* MME_BUILDER_H */
762*61046927SAndroid Build Coastguard Worker
763