xref: /aosp_15_r20/external/mesa3d/src/nouveau/codegen/nv50_ir_target_gm107.cpp (revision 6104692788411f58d303aa86923a9ff6ecaded22)
1 /*
2  * Copyright 2011 Christoph Bumiller
3  *           2014 Red Hat Inc.
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19  * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20  * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
21  * OTHER DEALINGS IN THE SOFTWARE.
22  */
23 
24 #include "nv50_ir_target_gm107.h"
25 #include "nv50_ir_lowering_gm107.h"
26 
27 namespace nv50_ir {
28 
getTargetGM107(unsigned int chipset)29 Target *getTargetGM107(unsigned int chipset)
30 {
31    return new TargetGM107(chipset);
32 }
33 
34 // BULTINS / LIBRARY FUNCTIONS:
35 
36 // lazyness -> will just hardcode everything for the time being
37 
38 #include "lib/gm107.asm.h"
39 
40 void
getBuiltinCode(const uint32_t ** code,uint32_t * size) const41 TargetGM107::getBuiltinCode(const uint32_t **code, uint32_t *size) const
42 {
43    *code = (const uint32_t *)&gm107_builtin_code[0];
44    *size = sizeof(gm107_builtin_code);
45 }
46 
47 uint32_t
getBuiltinOffset(int builtin) const48 TargetGM107::getBuiltinOffset(int builtin) const
49 {
50    assert(builtin < NVC0_BUILTIN_COUNT);
51    return gm107_builtin_offsets[builtin];
52 }
53 
54 bool
isOpSupported(operation op,DataType ty) const55 TargetGM107::isOpSupported(operation op, DataType ty) const
56 {
57    switch (op) {
58    case OP_SAD:
59    case OP_DIV:
60    case OP_MOD:
61       return false;
62    case OP_SQRT:
63       if (ty == TYPE_F64)
64          return false;
65       return chipset >= NVISA_GM200_CHIPSET;
66    case OP_XMAD:
67       if (isFloatType(ty))
68          return false;
69       break;
70    default:
71       break;
72    }
73 
74    return true;
75 }
76 
77 // Return true when an instruction supports the reuse flag. When supported, the
78 // hardware will use the operand reuse cache introduced since Maxwell, which
79 // should try to reduce bank conflicts by caching values for the subsequent
80 // instructions. Note that the next instructions have to use the same GPR id in
81 // the same operand slot.
82 bool
isReuseSupported(const Instruction * insn) const83 TargetGM107::isReuseSupported(const Instruction *insn) const
84 {
85    const OpClass cl = getOpClass(insn->op);
86 
87    // TODO: double-check!
88    switch (cl) {
89    case OPCLASS_ARITH:
90    case OPCLASS_COMPARE:
91    case OPCLASS_LOGIC:
92    case OPCLASS_MOVE:
93    case OPCLASS_SHIFT:
94       return true;
95    case OPCLASS_BITFIELD:
96       if (insn->op == OP_INSBF || insn->op == OP_EXTBF)
97          return true;
98       break;
99    default:
100       break;
101    }
102    return false;
103 }
104 
105 // Return true when an instruction requires to set up a barrier because it
106 // doesn't operate at a fixed latency. Variable latency instructions are memory
107 // operations, double precision operations, special function unit operations
108 // and other low throughput instructions.
109 bool
isBarrierRequired(const Instruction * insn) const110 TargetGM107::isBarrierRequired(const Instruction *insn) const
111 {
112    const OpClass cl = getOpClass(insn->op);
113 
114    if (insn->dType == TYPE_F64 || insn->sType == TYPE_F64)
115       return true;
116 
117    switch (cl) {
118    case OPCLASS_ATOMIC:
119    case OPCLASS_LOAD:
120    case OPCLASS_STORE:
121    case OPCLASS_SURFACE:
122    case OPCLASS_TEXTURE:
123       return true;
124    case OPCLASS_SFU:
125       switch (insn->op) {
126       case OP_COS:
127       case OP_EX2:
128       case OP_LG2:
129       case OP_LINTERP:
130       case OP_PINTERP:
131       case OP_RCP:
132       case OP_RSQ:
133       case OP_SIN:
134       case OP_SQRT:
135          return true;
136       default:
137          break;
138       }
139       break;
140    case OPCLASS_BITFIELD:
141       switch (insn->op) {
142       case OP_BFIND:
143       case OP_POPCNT:
144          return true;
145       default:
146          break;
147       }
148       break;
149    case OPCLASS_CONTROL:
150       switch (insn->op) {
151       case OP_EMIT:
152       case OP_RESTART:
153          return true;
154       default:
155          break;
156       }
157       break;
158    case OPCLASS_OTHER:
159       switch (insn->op) {
160       case OP_AFETCH:
161       case OP_PFETCH:
162       case OP_PIXLD:
163       case OP_SHFL:
164          return true;
165       case OP_RDSV:
166          return !isCS2RSV(insn->getSrc(0)->reg.data.sv.sv);
167       default:
168          break;
169       }
170       break;
171    case OPCLASS_ARITH:
172       if ((insn->op == OP_MUL || insn->op == OP_MAD) &&
173           !isFloatType(insn->dType))
174          return true;
175       break;
176    case OPCLASS_CONVERT:
177       if (insn->def(0).getFile() != FILE_PREDICATE &&
178           insn->src(0).getFile() != FILE_PREDICATE)
179          return true;
180       break;
181    default:
182       break;
183    }
184    return false;
185 }
186 
187 bool
canDualIssue(const Instruction * a,const Instruction * b) const188 TargetGM107::canDualIssue(const Instruction *a, const Instruction *b) const
189 {
190    // TODO
191    return false;
192 }
193 
194 // Return the number of stall counts needed to complete a single instruction.
195 // On Maxwell GPUs, the pipeline depth is 6, but some instructions require
196 // different number of stall counts like memory operations.
197 int
getLatency(const Instruction * insn) const198 TargetGM107::getLatency(const Instruction *insn) const
199 {
200    // TODO: better values! This should be good enough for now though.
201    switch (insn->op) {
202    case OP_EMIT:
203    case OP_EXPORT:
204    case OP_PIXLD:
205    case OP_RESTART:
206    case OP_STORE:
207    case OP_SUSTB:
208    case OP_SUSTP:
209       return 1;
210    case OP_SHFL:
211       return 2;
212    case OP_ADD:
213    case OP_AND:
214    case OP_EXTBF:
215    case OP_FMA:
216    case OP_INSBF:
217    case OP_MAD:
218    case OP_MAX:
219    case OP_MIN:
220    case OP_MOV:
221    case OP_MUL:
222    case OP_NOT:
223    case OP_OR:
224    case OP_PREEX2:
225    case OP_PRESIN:
226    case OP_QUADOP:
227    case OP_SELP:
228    case OP_SET:
229    case OP_SET_AND:
230    case OP_SET_OR:
231    case OP_SET_XOR:
232    case OP_SHL:
233    case OP_SHLADD:
234    case OP_SHR:
235    case OP_SLCT:
236    case OP_SUB:
237    case OP_VOTE:
238    case OP_XOR:
239    case OP_XMAD:
240       if (insn->dType != TYPE_F64)
241          return 6;
242       break;
243    case OP_RDSV:
244       return isCS2RSV(insn->getSrc(0)->reg.data.sv.sv) ? 6 : 15;
245    case OP_ABS:
246    case OP_CEIL:
247    case OP_CVT:
248    case OP_FLOOR:
249    case OP_NEG:
250    case OP_SAT:
251    case OP_TRUNC:
252       if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
253                                  insn->src(0).getFile() == FILE_PREDICATE))
254          return 6;
255       break;
256    case OP_BFIND:
257    case OP_COS:
258    case OP_EX2:
259    case OP_LG2:
260    case OP_POPCNT:
261    case OP_QUADON:
262    case OP_QUADPOP:
263    case OP_RCP:
264    case OP_RSQ:
265    case OP_SIN:
266    case OP_SQRT:
267       return 13;
268    default:
269       break;
270    }
271    // Use the maximum number of stall counts for other instructions.
272    return 15;
273 }
274 
275 // Return the operand read latency which is the number of stall counts before
276 // an instruction can read its sources. For memory operations like ATOM, LOAD
277 // and STORE, the memory access has to be indirect.
278 int
getReadLatency(const Instruction * insn) const279 TargetGM107::getReadLatency(const Instruction *insn) const
280 {
281    switch (insn->op) {
282    case OP_ABS:
283    case OP_BFIND:
284    case OP_CEIL:
285    case OP_COS:
286    case OP_EX2:
287    case OP_FLOOR:
288    case OP_LG2:
289    case OP_NEG:
290    case OP_POPCNT:
291    case OP_RCP:
292    case OP_RSQ:
293    case OP_SAT:
294    case OP_SIN:
295    case OP_SQRT:
296    case OP_SULDB:
297    case OP_SULDP:
298    case OP_SUREDB:
299    case OP_SUREDP:
300    case OP_SUSTB:
301    case OP_SUSTP:
302    case OP_TRUNC:
303       return 4;
304    case OP_CVT:
305       if (insn->def(0).getFile() != FILE_PREDICATE &&
306           insn->src(0).getFile() != FILE_PREDICATE)
307          return 4;
308       break;
309    case OP_ATOM:
310    case OP_LOAD:
311    case OP_STORE:
312       if (insn->src(0).isIndirect(0)) {
313          switch (insn->src(0).getFile()) {
314          case FILE_MEMORY_SHARED:
315          case FILE_MEMORY_CONST:
316             return 2;
317          case FILE_MEMORY_GLOBAL:
318          case FILE_MEMORY_LOCAL:
319             return 4;
320          default:
321             break;
322          }
323       }
324       break;
325    case OP_EXPORT:
326    case OP_PFETCH:
327    case OP_SHFL:
328    case OP_VFETCH:
329       return 2;
330    default:
331       break;
332    }
333    return 0;
334 }
335 
336 bool
isCS2RSV(SVSemantic sv) const337 TargetGM107::isCS2RSV(SVSemantic sv) const
338 {
339    return sv == SV_CLOCK;
340 }
341 
342 bool
runLegalizePass(Program * prog,CGStage stage) const343 TargetGM107::runLegalizePass(Program *prog, CGStage stage) const
344 {
345    if (stage == CG_STAGE_PRE_SSA) {
346       GM107LoweringPass pass(prog);
347       return pass.run(prog, false, true);
348    } else
349    if (stage == CG_STAGE_POST_RA) {
350       NVC0LegalizePostRA pass(prog);
351       return pass.run(prog, false, true);
352    } else
353    if (stage == CG_STAGE_SSA) {
354       GM107LegalizeSSA pass;
355       return pass.run(prog, false, true);
356    }
357    return false;
358 }
359 
360 CodeEmitter *
getCodeEmitter(Program::Type type)361 TargetGM107::getCodeEmitter(Program::Type type)
362 {
363    return createCodeEmitterGM107(type);
364 }
365 
366 } // namespace nv50_ir
367