1 /*
2 * Copyright 2014 Red Hat Inc.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
13 *
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
18 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
20 * OTHER DEALINGS IN THE SOFTWARE.
21 *
22 * Authors: Ben Skeggs <[email protected]>
23 */
24
25 #include "nv50_ir_target_gm107.h"
26 #include "nv50_ir_sched_gm107.h"
27
28 //#define GM107_DEBUG_SCHED_DATA
29
30 namespace nv50_ir {
31
32 class CodeEmitterGM107 : public CodeEmitter
33 {
34 public:
35 CodeEmitterGM107(const TargetGM107 *);
36
37 virtual bool emitInstruction(Instruction *);
38 virtual uint32_t getMinEncodingSize(const Instruction *) const;
39
40 virtual void prepareEmission(Program *);
41 virtual void prepareEmission(Function *);
42
setProgramType(Program::Type pType)43 inline void setProgramType(Program::Type pType) { progType = pType; }
44
45 private:
46 const TargetGM107 *targGM107;
47
48 Program::Type progType;
49
50 const Instruction *insn;
51 const bool writeIssueDelays;
52 uint32_t *data;
53
54 private:
55 inline void emitField(uint32_t *, int, int, uint32_t);
emitField(int b,int s,uint32_t v)56 inline void emitField(int b, int s, uint32_t v) { emitField(code, b, s, v); }
57
58 inline void emitInsn(uint32_t, bool);
emitInsn(uint32_t o)59 inline void emitInsn(uint32_t o) { emitInsn(o, true); }
60 inline void emitPred();
61 inline void emitGPR(int, const Value *);
emitGPR(int pos)62 inline void emitGPR(int pos) {
63 emitGPR(pos, (const Value *)NULL);
64 }
emitGPR(int pos,const ValueRef & ref)65 inline void emitGPR(int pos, const ValueRef &ref) {
66 emitGPR(pos, ref.get() ? ref.rep() : (const Value *)NULL);
67 }
emitGPR(int pos,const ValueRef * ref)68 inline void emitGPR(int pos, const ValueRef *ref) {
69 emitGPR(pos, ref ? ref->rep() : (const Value *)NULL);
70 }
emitGPR(int pos,const ValueDef & def)71 inline void emitGPR(int pos, const ValueDef &def) {
72 emitGPR(pos, def.get() ? def.rep() : (const Value *)NULL);
73 }
74 inline void emitSYS(int, const Value *);
emitSYS(int pos,const ValueRef & ref)75 inline void emitSYS(int pos, const ValueRef &ref) {
76 emitSYS(pos, ref.get() ? ref.rep() : (const Value *)NULL);
77 }
78 inline void emitPRED(int, const Value *);
emitPRED(int pos)79 inline void emitPRED(int pos) {
80 emitPRED(pos, (const Value *)NULL);
81 }
emitPRED(int pos,const ValueRef & ref)82 inline void emitPRED(int pos, const ValueRef &ref) {
83 emitPRED(pos, ref.get() ? ref.rep() : (const Value *)NULL);
84 }
emitPRED(int pos,const ValueDef & def)85 inline void emitPRED(int pos, const ValueDef &def) {
86 emitPRED(pos, def.get() ? def.rep() : (const Value *)NULL);
87 }
88 inline void emitADDR(int, int, int, int, const ValueRef &);
89 inline void emitCBUF(int, int, int, int, int, const ValueRef &);
90 inline bool longIMMD(const ValueRef &);
91 inline void emitIMMD(int, int, const ValueRef &);
92
93 void emitCond3(int, CondCode);
94 void emitCond4(int, CondCode);
emitCond5(int pos,CondCode cc)95 void emitCond5(int pos, CondCode cc) { emitCond4(pos, cc); }
96 inline void emitO(int);
97 inline void emitP(int);
98 inline void emitSAT(int);
99 inline void emitCC(int);
100 inline void emitX(int);
101 inline void emitABS(int, const ValueRef &);
102 inline void emitNEG(int, const ValueRef &);
103 inline void emitNEG2(int, const ValueRef &, const ValueRef &);
104 inline void emitFMZ(int, int);
105 inline void emitRND(int, RoundMode, int);
emitRND(int pos)106 inline void emitRND(int pos) {
107 emitRND(pos, insn->rnd, -1);
108 }
109 inline void emitPDIV(int);
110 inline void emitINV(int, const ValueRef &);
111
112 void emitEXIT();
113 void emitBRA();
114 void emitCAL();
115 void emitPCNT();
116 void emitCONT();
117 void emitPBK();
118 void emitBRK();
119 void emitPRET();
120 void emitRET();
121 void emitSSY();
122 void emitSYNC();
123 void emitSAM();
124 void emitRAM();
125
126 void emitPSETP();
127
128 void emitMOV();
129 void emitS2R();
130 void emitCS2R();
131 void emitF2F();
132 void emitF2I();
133 void emitI2F();
134 void emitI2I();
135 void emitSEL();
136 void emitSHFL();
137
138 void emitDADD();
139 void emitDMUL();
140 void emitDFMA();
141 void emitDMNMX();
142 void emitDSET();
143 void emitDSETP();
144
145 void emitFADD();
146 void emitFMUL();
147 void emitFFMA();
148 void emitMUFU();
149 void emitFMNMX();
150 void emitRRO();
151 void emitFCMP();
152 void emitFSET();
153 void emitFSETP();
154 void emitFSWZADD();
155
156 void emitLOP();
157 void emitNOT();
158 void emitIADD();
159 void emitIMUL();
160 void emitIMAD();
161 void emitISCADD();
162 void emitXMAD();
163 void emitIMNMX();
164 void emitICMP();
165 void emitISET();
166 void emitISETP();
167 void emitSHL();
168 void emitSHR();
169 void emitSHF();
170 void emitPOPC();
171 void emitBFI();
172 void emitBFE();
173 void emitFLO();
174 void emitPRMT();
175
176 void emitLDSTs(int, DataType);
177 void emitLDSTc(int);
178 void emitLDC();
179 void emitLDL();
180 void emitLDS();
181 void emitLD();
182 void emitSTL();
183 void emitSTS();
184 void emitST();
185 void emitALD();
186 void emitAST();
187 void emitISBERD();
188 void emitAL2P();
189 void emitIPA();
190 void emitATOM();
191 void emitATOMS();
192 void emitRED();
193 void emitCCTL();
194
195 void emitPIXLD();
196
197 void emitTEXs(int);
198 void emitTEX();
199 void emitTEXS();
200 void emitTLD();
201 void emitTLD4();
202 void emitTXD();
203 void emitTXQ();
204 void emitTMML();
205 void emitDEPBAR();
206
207 void emitNOP();
208 void emitKIL();
209 void emitOUT();
210
211 void emitBAR();
212 void emitMEMBAR();
213
214 void emitVOTE();
215
216 void emitSUTarget();
217 void emitSUHandle(const int s);
218 void emitSUSTx();
219 void emitSULDx();
220 void emitSUREDx();
221 };
222
223 /*******************************************************************************
224 * general instruction layout/fields
225 ******************************************************************************/
226
227 void
emitField(uint32_t * data,int b,int s,uint32_t v)228 CodeEmitterGM107::emitField(uint32_t *data, int b, int s, uint32_t v)
229 {
230 if (b >= 0) {
231 uint32_t m = ((1ULL << s) - 1);
232 uint64_t d = (uint64_t)(v & m) << b;
233 assert(!(v & ~m) || (v & ~m) == ~m);
234 data[1] |= d >> 32;
235 data[0] |= d;
236 }
237 }
238
239 void
emitPred()240 CodeEmitterGM107::emitPred()
241 {
242 if (insn->predSrc >= 0) {
243 emitField(16, 3, insn->getSrc(insn->predSrc)->rep()->reg.data.id);
244 emitField(19, 1, insn->cc == CC_NOT_P);
245 } else {
246 emitField(16, 3, 7);
247 }
248 }
249
250 void
emitInsn(uint32_t hi,bool pred)251 CodeEmitterGM107::emitInsn(uint32_t hi, bool pred)
252 {
253 code[0] = 0x00000000;
254 code[1] = hi;
255 if (pred)
256 emitPred();
257 }
258
259 void
emitGPR(int pos,const Value * val)260 CodeEmitterGM107::emitGPR(int pos, const Value *val)
261 {
262 emitField(pos, 8, val && !val->inFile(FILE_FLAGS) ?
263 val->reg.data.id : 255);
264 }
265
266 void
emitSYS(int pos,const Value * val)267 CodeEmitterGM107::emitSYS(int pos, const Value *val)
268 {
269 int id = val ? val->reg.data.id : -1;
270
271 switch (id) {
272 case SV_LANEID : id = 0x00; break;
273 case SV_VERTEX_COUNT : id = 0x10; break;
274 case SV_INVOCATION_ID : id = 0x11; break;
275 case SV_THREAD_KILL : id = 0x13; break;
276 case SV_INVOCATION_INFO: id = 0x1d; break;
277 case SV_COMBINED_TID : id = 0x20; break;
278 case SV_TID : id = 0x21 + val->reg.data.sv.index; break;
279 case SV_CTAID : id = 0x25 + val->reg.data.sv.index; break;
280 case SV_LANEMASK_EQ : id = 0x38; break;
281 case SV_LANEMASK_LT : id = 0x39; break;
282 case SV_LANEMASK_LE : id = 0x3a; break;
283 case SV_LANEMASK_GT : id = 0x3b; break;
284 case SV_LANEMASK_GE : id = 0x3c; break;
285 case SV_CLOCK : id = 0x50 + val->reg.data.sv.index; break;
286 default:
287 assert(!"invalid system value");
288 id = 0;
289 break;
290 }
291
292 emitField(pos, 8, id);
293 }
294
295 void
emitPRED(int pos,const Value * val)296 CodeEmitterGM107::emitPRED(int pos, const Value *val)
297 {
298 emitField(pos, 3, val ? val->reg.data.id : 7);
299 }
300
301 void
emitADDR(int gpr,int off,int len,int shr,const ValueRef & ref)302 CodeEmitterGM107::emitADDR(int gpr, int off, int len, int shr,
303 const ValueRef &ref)
304 {
305 const Value *v = ref.get();
306 assert(!(v->reg.data.offset & ((1 << shr) - 1)));
307 if (gpr >= 0)
308 emitGPR(gpr, ref.getIndirect(0));
309 emitField(off, len, v->reg.data.offset >> shr);
310 }
311
312 void
emitCBUF(int buf,int gpr,int off,int len,int shr,const ValueRef & ref)313 CodeEmitterGM107::emitCBUF(int buf, int gpr, int off, int len, int shr,
314 const ValueRef &ref)
315 {
316 const Value *v = ref.get();
317 const Symbol *s = v->asSym();
318
319 assert(!(s->reg.data.offset & ((1 << shr) - 1)));
320
321 emitField(buf, 5, v->reg.fileIndex);
322 if (gpr >= 0)
323 emitGPR(gpr, ref.getIndirect(0));
324 emitField(off, 16, s->reg.data.offset >> shr);
325 }
326
327 bool
longIMMD(const ValueRef & ref)328 CodeEmitterGM107::longIMMD(const ValueRef &ref)
329 {
330 if (ref.getFile() == FILE_IMMEDIATE) {
331 const ImmediateValue *imm = ref.get()->asImm();
332 if (isFloatType(insn->sType))
333 return imm->reg.data.u32 & 0xfff;
334 else
335 return imm->reg.data.s32 > 0x7ffff || imm->reg.data.s32 < -0x80000;
336 }
337 return false;
338 }
339
340 void
emitIMMD(int pos,int len,const ValueRef & ref)341 CodeEmitterGM107::emitIMMD(int pos, int len, const ValueRef &ref)
342 {
343 const ImmediateValue *imm = ref.get()->asImm();
344 uint32_t val = imm->reg.data.u32;
345
346 if (len == 19) {
347 if (insn->sType == TYPE_F32 || insn->sType == TYPE_F16) {
348 assert(!(val & 0x00000fff));
349 val >>= 12;
350 } else if (insn->sType == TYPE_F64) {
351 assert(!(imm->reg.data.u64 & 0x00000fffffffffffULL));
352 val = imm->reg.data.u64 >> 44;
353 } else {
354 assert(!(val & 0xfff80000) || (val & 0xfff80000) == 0xfff80000);
355 }
356 emitField( 56, 1, (val & 0x80000) >> 19);
357 emitField(pos, len, (val & 0x7ffff));
358 } else {
359 emitField(pos, len, val);
360 }
361 }
362
363 /*******************************************************************************
364 * modifiers
365 ******************************************************************************/
366
367 void
emitCond3(int pos,CondCode code)368 CodeEmitterGM107::emitCond3(int pos, CondCode code)
369 {
370 int data = 0;
371
372 switch (code) {
373 case CC_FL : data = 0x00; break;
374 case CC_LTU:
375 case CC_LT : data = 0x01; break;
376 case CC_EQU:
377 case CC_EQ : data = 0x02; break;
378 case CC_LEU:
379 case CC_LE : data = 0x03; break;
380 case CC_GTU:
381 case CC_GT : data = 0x04; break;
382 case CC_NEU:
383 case CC_NE : data = 0x05; break;
384 case CC_GEU:
385 case CC_GE : data = 0x06; break;
386 case CC_TR : data = 0x07; break;
387 default:
388 assert(!"invalid cond3");
389 break;
390 }
391
392 emitField(pos, 3, data);
393 }
394
395 void
emitCond4(int pos,CondCode code)396 CodeEmitterGM107::emitCond4(int pos, CondCode code)
397 {
398 int data = 0;
399
400 switch (code) {
401 case CC_FL: data = 0x00; break;
402 case CC_LT: data = 0x01; break;
403 case CC_EQ: data = 0x02; break;
404 case CC_LE: data = 0x03; break;
405 case CC_GT: data = 0x04; break;
406 case CC_NE: data = 0x05; break;
407 case CC_GE: data = 0x06; break;
408 // case CC_NUM: data = 0x07; break;
409 // case CC_NAN: data = 0x08; break;
410 case CC_LTU: data = 0x09; break;
411 case CC_EQU: data = 0x0a; break;
412 case CC_LEU: data = 0x0b; break;
413 case CC_GTU: data = 0x0c; break;
414 case CC_NEU: data = 0x0d; break;
415 case CC_GEU: data = 0x0e; break;
416 case CC_TR: data = 0x0f; break;
417 default:
418 assert(!"invalid cond4");
419 break;
420 }
421
422 emitField(pos, 4, data);
423 }
424
425 void
emitO(int pos)426 CodeEmitterGM107::emitO(int pos)
427 {
428 emitField(pos, 1, insn->getSrc(0)->reg.file == FILE_SHADER_OUTPUT);
429 }
430
431 void
emitP(int pos)432 CodeEmitterGM107::emitP(int pos)
433 {
434 emitField(pos, 1, insn->perPatch);
435 }
436
437 void
emitSAT(int pos)438 CodeEmitterGM107::emitSAT(int pos)
439 {
440 emitField(pos, 1, insn->saturate);
441 }
442
443 void
emitCC(int pos)444 CodeEmitterGM107::emitCC(int pos)
445 {
446 emitField(pos, 1, insn->flagsDef >= 0);
447 }
448
449 void
emitX(int pos)450 CodeEmitterGM107::emitX(int pos)
451 {
452 emitField(pos, 1, insn->flagsSrc >= 0);
453 }
454
455 void
emitABS(int pos,const ValueRef & ref)456 CodeEmitterGM107::emitABS(int pos, const ValueRef &ref)
457 {
458 emitField(pos, 1, ref.mod.abs());
459 }
460
461 void
emitNEG(int pos,const ValueRef & ref)462 CodeEmitterGM107::emitNEG(int pos, const ValueRef &ref)
463 {
464 emitField(pos, 1, ref.mod.neg());
465 }
466
467 void
emitNEG2(int pos,const ValueRef & a,const ValueRef & b)468 CodeEmitterGM107::emitNEG2(int pos, const ValueRef &a, const ValueRef &b)
469 {
470 emitField(pos, 1, a.mod.neg() ^ b.mod.neg());
471 }
472
473 void
emitFMZ(int pos,int len)474 CodeEmitterGM107::emitFMZ(int pos, int len)
475 {
476 emitField(pos, len, insn->dnz << 1 | insn->ftz);
477 }
478
479 void
emitRND(int rmp,RoundMode rnd,int rip)480 CodeEmitterGM107::emitRND(int rmp, RoundMode rnd, int rip)
481 {
482 int rm = 0, ri = 0;
483 switch (rnd) {
484 case ROUND_NI: ri = 1;
485 case ROUND_N : rm = 0; break;
486 case ROUND_MI: ri = 1;
487 case ROUND_M : rm = 1; break;
488 case ROUND_PI: ri = 1;
489 case ROUND_P : rm = 2; break;
490 case ROUND_ZI: ri = 1;
491 case ROUND_Z : rm = 3; break;
492 default:
493 assert(!"invalid round mode");
494 break;
495 }
496 emitField(rip, 1, ri);
497 emitField(rmp, 2, rm);
498 }
499
500 void
emitPDIV(int pos)501 CodeEmitterGM107::emitPDIV(int pos)
502 {
503 assert(insn->postFactor >= -3 && insn->postFactor <= 3);
504 if (insn->postFactor > 0)
505 emitField(pos, 3, 7 - insn->postFactor);
506 else
507 emitField(pos, 3, 0 - insn->postFactor);
508 }
509
510 void
emitINV(int pos,const ValueRef & ref)511 CodeEmitterGM107::emitINV(int pos, const ValueRef &ref)
512 {
513 emitField(pos, 1, !!(ref.mod & Modifier(NV50_IR_MOD_NOT)));
514 }
515
516 /*******************************************************************************
517 * control flow
518 ******************************************************************************/
519
520 void
emitEXIT()521 CodeEmitterGM107::emitEXIT()
522 {
523 emitInsn (0xe3000000);
524 emitCond5(0x00, CC_TR);
525 }
526
527 void
emitBRA()528 CodeEmitterGM107::emitBRA()
529 {
530 const FlowInstruction *insn = this->insn->asFlow();
531 int gpr = -1;
532
533 if (insn->indirect) {
534 if (insn->absolute)
535 emitInsn(0xe2000000); // JMX
536 else
537 emitInsn(0xe2500000); // BRX
538 gpr = 0x08;
539 } else {
540 if (insn->absolute)
541 emitInsn(0xe2100000); // JMP
542 else
543 emitInsn(0xe2400000); // BRA
544 emitField(0x07, 1, insn->allWarp);
545 }
546
547 emitField(0x06, 1, insn->limit);
548 emitCond5(0x00, CC_TR);
549
550 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
551 int32_t pos = insn->target.bb->binPos;
552 if (writeIssueDelays && !(pos & 0x1f))
553 pos += 8;
554 if (!insn->absolute)
555 emitField(0x14, 24, pos - (codeSize + 8));
556 else
557 emitField(0x14, 32, pos);
558 } else {
559 emitCBUF (0x24, gpr, 20, 16, 0, insn->src(0));
560 emitField(0x05, 1, 1);
561 }
562 }
563
564 void
emitCAL()565 CodeEmitterGM107::emitCAL()
566 {
567 const FlowInstruction *insn = this->insn->asFlow();
568
569 if (insn->absolute) {
570 emitInsn(0xe2200000, false); // JCAL
571 } else {
572 emitInsn(0xe2600000, false); // CAL
573 }
574
575 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
576 if (!insn->absolute)
577 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
578 else {
579 if (insn->builtin) {
580 int pcAbs = targGM107->getBuiltinOffset(insn->target.builtin);
581 addReloc(RelocEntry::TYPE_BUILTIN, 0, pcAbs, 0xfff00000, 20);
582 addReloc(RelocEntry::TYPE_BUILTIN, 1, pcAbs, 0x000fffff, -12);
583 } else {
584 emitField(0x14, 32, insn->target.bb->binPos);
585 }
586 }
587 } else {
588 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
589 emitField(0x05, 1, 1);
590 }
591 }
592
593 void
emitPCNT()594 CodeEmitterGM107::emitPCNT()
595 {
596 const FlowInstruction *insn = this->insn->asFlow();
597
598 emitInsn(0xe2b00000, false);
599
600 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
601 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
602 } else {
603 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
604 emitField(0x05, 1, 1);
605 }
606 }
607
608 void
emitCONT()609 CodeEmitterGM107::emitCONT()
610 {
611 emitInsn (0xe3500000);
612 emitCond5(0x00, CC_TR);
613 }
614
615 void
emitPBK()616 CodeEmitterGM107::emitPBK()
617 {
618 const FlowInstruction *insn = this->insn->asFlow();
619
620 emitInsn(0xe2a00000, false);
621
622 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
623 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
624 } else {
625 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
626 emitField(0x05, 1, 1);
627 }
628 }
629
630 void
emitBRK()631 CodeEmitterGM107::emitBRK()
632 {
633 emitInsn (0xe3400000);
634 emitCond5(0x00, CC_TR);
635 }
636
637 void
emitPRET()638 CodeEmitterGM107::emitPRET()
639 {
640 const FlowInstruction *insn = this->insn->asFlow();
641
642 emitInsn(0xe2700000, false);
643
644 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
645 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
646 } else {
647 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
648 emitField(0x05, 1, 1);
649 }
650 }
651
652 void
emitRET()653 CodeEmitterGM107::emitRET()
654 {
655 emitInsn (0xe3200000);
656 emitCond5(0x00, CC_TR);
657 }
658
659 void
emitSSY()660 CodeEmitterGM107::emitSSY()
661 {
662 const FlowInstruction *insn = this->insn->asFlow();
663
664 emitInsn(0xe2900000, false);
665
666 if (!insn->srcExists(0) || insn->src(0).getFile() != FILE_MEMORY_CONST) {
667 emitField(0x14, 24, insn->target.bb->binPos - (codeSize + 8));
668 } else {
669 emitCBUF (0x24, -1, 20, 16, 0, insn->src(0));
670 emitField(0x05, 1, 1);
671 }
672 }
673
674 void
emitSYNC()675 CodeEmitterGM107::emitSYNC()
676 {
677 emitInsn (0xf0f80000);
678 emitCond5(0x00, CC_TR);
679 }
680
681 void
emitSAM()682 CodeEmitterGM107::emitSAM()
683 {
684 emitInsn(0xe3700000, false);
685 }
686
687 void
emitRAM()688 CodeEmitterGM107::emitRAM()
689 {
690 emitInsn(0xe3800000, false);
691 }
692
693 /*******************************************************************************
694 * predicate/cc
695 ******************************************************************************/
696
697 void
emitPSETP()698 CodeEmitterGM107::emitPSETP()
699 {
700
701 emitInsn(0x50900000);
702
703 switch (insn->op) {
704 case OP_AND: emitField(0x18, 3, 0); break;
705 case OP_OR: emitField(0x18, 3, 1); break;
706 case OP_XOR: emitField(0x18, 3, 2); break;
707 default:
708 assert(!"unexpected operation");
709 break;
710 }
711
712 // emitINV (0x2a);
713 emitPRED(0x27); // TODO: support 3-arg
714 emitINV (0x20, insn->src(1));
715 emitPRED(0x1d, insn->src(1));
716 emitINV (0x0f, insn->src(0));
717 emitPRED(0x0c, insn->src(0));
718 emitPRED(0x03, insn->def(0));
719 emitPRED(0x00);
720 }
721
722 /*******************************************************************************
723 * movement / conversion
724 ******************************************************************************/
725
726 void
emitMOV()727 CodeEmitterGM107::emitMOV()
728 {
729 if (insn->src(0).getFile() != FILE_IMMEDIATE) {
730 switch (insn->src(0).getFile()) {
731 case FILE_GPR:
732 if (insn->def(0).getFile() == FILE_PREDICATE) {
733 emitInsn(0x5b6a0000);
734 emitGPR (0x08);
735 } else {
736 emitInsn(0x5c980000);
737 }
738 emitGPR (0x14, insn->src(0));
739 break;
740 case FILE_MEMORY_CONST:
741 emitInsn(0x4c980000);
742 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
743 break;
744 case FILE_IMMEDIATE:
745 emitInsn(0x38980000);
746 emitIMMD(0x14, 19, insn->src(0));
747 break;
748 case FILE_PREDICATE:
749 emitInsn(0x50880000);
750 emitPRED(0x0c, insn->src(0));
751 emitPRED(0x1d);
752 emitPRED(0x27);
753 break;
754 default:
755 assert(!"bad src file");
756 break;
757 }
758 if (insn->def(0).getFile() != FILE_PREDICATE &&
759 insn->src(0).getFile() != FILE_PREDICATE)
760 emitField(0x27, 4, insn->lanes);
761 } else {
762 emitInsn (0x01000000);
763 emitIMMD (0x14, 32, insn->src(0));
764 emitField(0x0c, 4, insn->lanes);
765 }
766
767 if (insn->def(0).getFile() == FILE_PREDICATE) {
768 emitPRED(0x27);
769 emitPRED(0x03, insn->def(0));
770 emitPRED(0x00);
771 } else {
772 emitGPR(0x00, insn->def(0));
773 }
774 }
775
776 void
emitS2R()777 CodeEmitterGM107::emitS2R()
778 {
779 emitInsn(0xf0c80000);
780 emitSYS (0x14, insn->src(0));
781 emitGPR (0x00, insn->def(0));
782 }
783
784 void
emitCS2R()785 CodeEmitterGM107::emitCS2R()
786 {
787 emitInsn(0x50c80000);
788 emitSYS (0x14, insn->src(0));
789 emitGPR (0x00, insn->def(0));
790 }
791
792 void
emitF2F()793 CodeEmitterGM107::emitF2F()
794 {
795 RoundMode rnd = insn->rnd;
796
797 switch (insn->op) {
798 case OP_FLOOR: rnd = ROUND_MI; break;
799 case OP_CEIL : rnd = ROUND_PI; break;
800 case OP_TRUNC: rnd = ROUND_ZI; break;
801 default:
802 break;
803 }
804
805 switch (insn->src(0).getFile()) {
806 case FILE_GPR:
807 emitInsn(0x5ca80000);
808 emitGPR (0x14, insn->src(0));
809 break;
810 case FILE_MEMORY_CONST:
811 emitInsn(0x4ca80000);
812 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
813 break;
814 case FILE_IMMEDIATE:
815 emitInsn(0x38a80000);
816 emitIMMD(0x14, 19, insn->src(0));
817 break;
818 default:
819 assert(!"bad src0 file");
820 break;
821 }
822
823 emitField(0x32, 1, (insn->op == OP_SAT) || insn->saturate);
824 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
825 emitCC (0x2f);
826 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
827 emitFMZ (0x2c, 1);
828 emitField(0x29, 1, insn->subOp);
829 emitRND (0x27, rnd, 0x2a);
830 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
831 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
832 emitGPR (0x00, insn->def(0));
833 }
834
835 void
emitF2I()836 CodeEmitterGM107::emitF2I()
837 {
838 RoundMode rnd = insn->rnd;
839
840 switch (insn->op) {
841 case OP_FLOOR: rnd = ROUND_M; break;
842 case OP_CEIL : rnd = ROUND_P; break;
843 case OP_TRUNC: rnd = ROUND_Z; break;
844 default:
845 break;
846 }
847
848 switch (insn->src(0).getFile()) {
849 case FILE_GPR:
850 emitInsn(0x5cb00000);
851 emitGPR (0x14, insn->src(0));
852 break;
853 case FILE_MEMORY_CONST:
854 emitInsn(0x4cb00000);
855 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
856 break;
857 case FILE_IMMEDIATE:
858 emitInsn(0x38b00000);
859 emitIMMD(0x14, 19, insn->src(0));
860 break;
861 default:
862 assert(!"bad src0 file");
863 break;
864 }
865
866 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
867 emitCC (0x2f);
868 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
869 emitFMZ (0x2c, 1);
870 emitRND (0x27, rnd, 0x2a);
871 emitField(0x0c, 1, isSignedType(insn->dType));
872 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
873 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
874 emitGPR (0x00, insn->def(0));
875 }
876
877 void
emitI2F()878 CodeEmitterGM107::emitI2F()
879 {
880 RoundMode rnd = insn->rnd;
881
882 switch (insn->op) {
883 case OP_FLOOR: rnd = ROUND_M; break;
884 case OP_CEIL : rnd = ROUND_P; break;
885 case OP_TRUNC: rnd = ROUND_Z; break;
886 default:
887 break;
888 }
889
890 switch (insn->src(0).getFile()) {
891 case FILE_GPR:
892 emitInsn(0x5cb80000);
893 emitGPR (0x14, insn->src(0));
894 break;
895 case FILE_MEMORY_CONST:
896 emitInsn(0x4cb80000);
897 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
898 break;
899 case FILE_IMMEDIATE:
900 emitInsn(0x38b80000);
901 emitIMMD(0x14, 19, insn->src(0));
902 break;
903 default:
904 assert(!"bad src0 file");
905 break;
906 }
907
908 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
909 emitCC (0x2f);
910 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
911 emitField(0x29, 2, insn->subOp);
912 emitRND (0x27, rnd, -1);
913 emitField(0x0d, 1, isSignedType(insn->sType));
914 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
915 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
916 emitGPR (0x00, insn->def(0));
917 }
918
919 void
emitI2I()920 CodeEmitterGM107::emitI2I()
921 {
922 switch (insn->src(0).getFile()) {
923 case FILE_GPR:
924 emitInsn(0x5ce00000);
925 emitGPR (0x14, insn->src(0));
926 break;
927 case FILE_MEMORY_CONST:
928 emitInsn(0x4ce00000);
929 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
930 break;
931 case FILE_IMMEDIATE:
932 emitInsn(0x38e00000);
933 emitIMMD(0x14, 19, insn->src(0));
934 break;
935 default:
936 assert(!"bad src0 file");
937 break;
938 }
939
940 emitSAT (0x32);
941 emitField(0x31, 1, (insn->op == OP_ABS) || insn->src(0).mod.abs());
942 emitCC (0x2f);
943 emitField(0x2d, 1, (insn->op == OP_NEG) || insn->src(0).mod.neg());
944 emitField(0x29, 2, insn->subOp);
945 emitField(0x0d, 1, isSignedType(insn->sType));
946 emitField(0x0c, 1, isSignedType(insn->dType));
947 emitField(0x0a, 2, util_logbase2(typeSizeof(insn->sType)));
948 emitField(0x08, 2, util_logbase2(typeSizeof(insn->dType)));
949 emitGPR (0x00, insn->def(0));
950 }
951
952 void
gm107_selpFlip(const FixupEntry * entry,uint32_t * code,const FixupData & data)953 gm107_selpFlip(const FixupEntry *entry, uint32_t *code, const FixupData& data)
954 {
955 int loc = entry->loc;
956 bool val = false;
957 switch (entry->ipa) {
958 case 0:
959 val = data.force_persample_interp;
960 break;
961 case 1:
962 val = data.msaa;
963 break;
964 }
965 if (val)
966 code[loc + 1] |= 1 << 10;
967 else
968 code[loc + 1] &= ~(1 << 10);
969 }
970
971 void
emitSEL()972 CodeEmitterGM107::emitSEL()
973 {
974 switch (insn->src(1).getFile()) {
975 case FILE_GPR:
976 emitInsn(0x5ca00000);
977 emitGPR (0x14, insn->src(1));
978 break;
979 case FILE_MEMORY_CONST:
980 emitInsn(0x4ca00000);
981 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
982 break;
983 case FILE_IMMEDIATE:
984 emitInsn(0x38a00000);
985 emitIMMD(0x14, 19, insn->src(1));
986 break;
987 default:
988 assert(!"bad src1 file");
989 break;
990 }
991
992 emitINV (0x2a, insn->src(2));
993 emitPRED(0x27, insn->src(2));
994 emitGPR (0x08, insn->src(0));
995 emitGPR (0x00, insn->def(0));
996
997 if (insn->subOp >= 1) {
998 addInterp(insn->subOp - 1, 0, gm107_selpFlip);
999 }
1000 }
1001
1002 void
emitSHFL()1003 CodeEmitterGM107::emitSHFL()
1004 {
1005 int type = 0;
1006
1007 emitInsn (0xef100000);
1008
1009 switch (insn->src(1).getFile()) {
1010 case FILE_GPR:
1011 emitGPR(0x14, insn->src(1));
1012 break;
1013 case FILE_IMMEDIATE:
1014 emitIMMD(0x14, 5, insn->src(1));
1015 type |= 1;
1016 break;
1017 default:
1018 assert(!"invalid src1 file");
1019 break;
1020 }
1021
1022 switch (insn->src(2).getFile()) {
1023 case FILE_GPR:
1024 emitGPR(0x27, insn->src(2));
1025 break;
1026 case FILE_IMMEDIATE:
1027 emitIMMD(0x22, 13, insn->src(2));
1028 type |= 2;
1029 break;
1030 default:
1031 assert(!"invalid src2 file");
1032 break;
1033 }
1034
1035 if (!insn->defExists(1))
1036 emitPRED(0x30);
1037 else {
1038 assert(insn->def(1).getFile() == FILE_PREDICATE);
1039 emitPRED(0x30, insn->def(1));
1040 }
1041
1042 emitField(0x1e, 2, insn->subOp);
1043 emitField(0x1c, 2, type);
1044 emitGPR (0x08, insn->src(0));
1045 emitGPR (0x00, insn->def(0));
1046 }
1047
1048 /*******************************************************************************
1049 * double
1050 ******************************************************************************/
1051
1052 void
emitDADD()1053 CodeEmitterGM107::emitDADD()
1054 {
1055 switch (insn->src(1).getFile()) {
1056 case FILE_GPR:
1057 emitInsn(0x5c700000);
1058 emitGPR (0x14, insn->src(1));
1059 break;
1060 case FILE_MEMORY_CONST:
1061 emitInsn(0x4c700000);
1062 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1063 break;
1064 case FILE_IMMEDIATE:
1065 emitInsn(0x38700000);
1066 emitIMMD(0x14, 19, insn->src(1));
1067 break;
1068 default:
1069 assert(!"bad src1 file");
1070 break;
1071 }
1072 emitABS(0x31, insn->src(1));
1073 emitNEG(0x30, insn->src(0));
1074 emitCC (0x2f);
1075 emitABS(0x2e, insn->src(0));
1076 emitNEG(0x2d, insn->src(1));
1077
1078 if (insn->op == OP_SUB)
1079 code[1] ^= 0x00002000;
1080
1081 emitGPR(0x08, insn->src(0));
1082 emitGPR(0x00, insn->def(0));
1083 }
1084
1085 void
emitDMUL()1086 CodeEmitterGM107::emitDMUL()
1087 {
1088 switch (insn->src(1).getFile()) {
1089 case FILE_GPR:
1090 emitInsn(0x5c800000);
1091 emitGPR (0x14, insn->src(1));
1092 break;
1093 case FILE_MEMORY_CONST:
1094 emitInsn(0x4c800000);
1095 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1096 break;
1097 case FILE_IMMEDIATE:
1098 emitInsn(0x38800000);
1099 emitIMMD(0x14, 19, insn->src(1));
1100 break;
1101 default:
1102 assert(!"bad src1 file");
1103 break;
1104 }
1105
1106 emitNEG2(0x30, insn->src(0), insn->src(1));
1107 emitCC (0x2f);
1108 emitRND (0x27);
1109 emitGPR (0x08, insn->src(0));
1110 emitGPR (0x00, insn->def(0));
1111 }
1112
1113 void
emitDFMA()1114 CodeEmitterGM107::emitDFMA()
1115 {
1116 switch(insn->src(2).getFile()) {
1117 case FILE_GPR:
1118 switch (insn->src(1).getFile()) {
1119 case FILE_GPR:
1120 emitInsn(0x5b700000);
1121 emitGPR (0x14, insn->src(1));
1122 break;
1123 case FILE_MEMORY_CONST:
1124 emitInsn(0x4b700000);
1125 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1126 break;
1127 case FILE_IMMEDIATE:
1128 emitInsn(0x36700000);
1129 emitIMMD(0x14, 19, insn->src(1));
1130 break;
1131 default:
1132 assert(!"bad src1 file");
1133 break;
1134 }
1135 emitGPR (0x27, insn->src(2));
1136 break;
1137 case FILE_MEMORY_CONST:
1138 emitInsn(0x53700000);
1139 emitGPR (0x27, insn->src(1));
1140 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1141 break;
1142 default:
1143 assert(!"bad src2 file");
1144 break;
1145 }
1146
1147 emitRND (0x32);
1148 emitNEG (0x31, insn->src(2));
1149 emitNEG2(0x30, insn->src(0), insn->src(1));
1150 emitCC (0x2f);
1151 emitGPR (0x08, insn->src(0));
1152 emitGPR (0x00, insn->def(0));
1153 }
1154
1155 void
emitDMNMX()1156 CodeEmitterGM107::emitDMNMX()
1157 {
1158 switch (insn->src(1).getFile()) {
1159 case FILE_GPR:
1160 emitInsn(0x5c500000);
1161 emitGPR (0x14, insn->src(1));
1162 break;
1163 case FILE_MEMORY_CONST:
1164 emitInsn(0x4c500000);
1165 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1166 break;
1167 case FILE_IMMEDIATE:
1168 emitInsn(0x38500000);
1169 emitIMMD(0x14, 19, insn->src(1));
1170 break;
1171 default:
1172 assert(!"bad src1 file");
1173 break;
1174 }
1175
1176 emitABS (0x31, insn->src(1));
1177 emitNEG (0x30, insn->src(0));
1178 emitCC (0x2f);
1179 emitABS (0x2e, insn->src(0));
1180 emitNEG (0x2d, insn->src(1));
1181 emitField(0x2a, 1, insn->op == OP_MAX);
1182 emitPRED (0x27);
1183 emitGPR (0x08, insn->src(0));
1184 emitGPR (0x00, insn->def(0));
1185 }
1186
1187 void
emitDSET()1188 CodeEmitterGM107::emitDSET()
1189 {
1190 const CmpInstruction *insn = this->insn->asCmp();
1191
1192 switch (insn->src(1).getFile()) {
1193 case FILE_GPR:
1194 emitInsn(0x59000000);
1195 emitGPR (0x14, insn->src(1));
1196 break;
1197 case FILE_MEMORY_CONST:
1198 emitInsn(0x49000000);
1199 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1200 break;
1201 case FILE_IMMEDIATE:
1202 emitInsn(0x32000000);
1203 emitIMMD(0x14, 19, insn->src(1));
1204 break;
1205 default:
1206 assert(!"bad src1 file");
1207 break;
1208 }
1209
1210 if (insn->op != OP_SET) {
1211 switch (insn->op) {
1212 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1213 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1214 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1215 default:
1216 assert(!"invalid set op");
1217 break;
1218 }
1219 emitPRED(0x27, insn->src(2));
1220 } else {
1221 emitPRED(0x27);
1222 }
1223
1224 emitABS (0x36, insn->src(0));
1225 emitNEG (0x35, insn->src(1));
1226 emitField(0x34, 1, insn->dType == TYPE_F32);
1227 emitCond4(0x30, insn->setCond);
1228 emitCC (0x2f);
1229 emitABS (0x2c, insn->src(1));
1230 emitNEG (0x2b, insn->src(0));
1231 emitGPR (0x08, insn->src(0));
1232 emitGPR (0x00, insn->def(0));
1233 }
1234
1235 void
emitDSETP()1236 CodeEmitterGM107::emitDSETP()
1237 {
1238 const CmpInstruction *insn = this->insn->asCmp();
1239
1240 switch (insn->src(1).getFile()) {
1241 case FILE_GPR:
1242 emitInsn(0x5b800000);
1243 emitGPR (0x14, insn->src(1));
1244 break;
1245 case FILE_MEMORY_CONST:
1246 emitInsn(0x4b800000);
1247 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1248 break;
1249 case FILE_IMMEDIATE:
1250 emitInsn(0x36800000);
1251 emitIMMD(0x14, 19, insn->src(1));
1252 break;
1253 default:
1254 assert(!"bad src1 file");
1255 break;
1256 }
1257
1258 if (insn->op != OP_SET) {
1259 switch (insn->op) {
1260 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1261 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1262 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1263 default:
1264 assert(!"invalid set op");
1265 break;
1266 }
1267 emitPRED(0x27, insn->src(2));
1268 } else {
1269 emitPRED(0x27);
1270 }
1271
1272 emitCond4(0x30, insn->setCond);
1273 emitABS (0x2c, insn->src(1));
1274 emitNEG (0x2b, insn->src(0));
1275 emitGPR (0x08, insn->src(0));
1276 emitABS (0x07, insn->src(0));
1277 emitNEG (0x06, insn->src(1));
1278 emitPRED (0x03, insn->def(0));
1279 if (insn->defExists(1))
1280 emitPRED(0x00, insn->def(1));
1281 else
1282 emitPRED(0x00);
1283 }
1284
1285 /*******************************************************************************
1286 * float
1287 ******************************************************************************/
1288
1289 void
emitFADD()1290 CodeEmitterGM107::emitFADD()
1291 {
1292 if (!longIMMD(insn->src(1))) {
1293 switch (insn->src(1).getFile()) {
1294 case FILE_GPR:
1295 emitInsn(0x5c580000);
1296 emitGPR (0x14, insn->src(1));
1297 break;
1298 case FILE_MEMORY_CONST:
1299 emitInsn(0x4c580000);
1300 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1301 break;
1302 case FILE_IMMEDIATE:
1303 emitInsn(0x38580000);
1304 emitIMMD(0x14, 19, insn->src(1));
1305 break;
1306 default:
1307 assert(!"bad src1 file");
1308 break;
1309 }
1310 emitSAT(0x32);
1311 emitABS(0x31, insn->src(1));
1312 emitNEG(0x30, insn->src(0));
1313 emitCC (0x2f);
1314 emitABS(0x2e, insn->src(0));
1315 emitNEG(0x2d, insn->src(1));
1316 emitFMZ(0x2c, 1);
1317
1318 if (insn->op == OP_SUB)
1319 code[1] ^= 0x00002000;
1320 } else {
1321 emitInsn(0x08000000);
1322 emitABS(0x39, insn->src(1));
1323 emitNEG(0x38, insn->src(0));
1324 emitFMZ(0x37, 1);
1325 emitABS(0x36, insn->src(0));
1326 emitNEG(0x35, insn->src(1));
1327 emitCC (0x34);
1328 emitIMMD(0x14, 32, insn->src(1));
1329
1330 if (insn->op == OP_SUB)
1331 code[1] ^= 0x00080000;
1332 }
1333
1334 emitGPR(0x08, insn->src(0));
1335 emitGPR(0x00, insn->def(0));
1336 }
1337
1338 void
emitFMUL()1339 CodeEmitterGM107::emitFMUL()
1340 {
1341 if (!longIMMD(insn->src(1))) {
1342 switch (insn->src(1).getFile()) {
1343 case FILE_GPR:
1344 emitInsn(0x5c680000);
1345 emitGPR (0x14, insn->src(1));
1346 break;
1347 case FILE_MEMORY_CONST:
1348 emitInsn(0x4c680000);
1349 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1350 break;
1351 case FILE_IMMEDIATE:
1352 emitInsn(0x38680000);
1353 emitIMMD(0x14, 19, insn->src(1));
1354 break;
1355 default:
1356 assert(!"bad src1 file");
1357 break;
1358 }
1359 emitSAT (0x32);
1360 emitNEG2(0x30, insn->src(0), insn->src(1));
1361 emitCC (0x2f);
1362 emitFMZ (0x2c, 2);
1363 emitPDIV(0x29);
1364 emitRND (0x27);
1365 } else {
1366 emitInsn(0x1e000000);
1367 emitSAT (0x37);
1368 emitFMZ (0x35, 2);
1369 emitCC (0x34);
1370 emitIMMD(0x14, 32, insn->src(1));
1371 if (insn->src(0).mod.neg() ^ insn->src(1).mod.neg())
1372 code[1] ^= 0x00080000; /* flip immd sign bit */
1373 }
1374
1375 emitGPR(0x08, insn->src(0));
1376 emitGPR(0x00, insn->def(0));
1377 }
1378
1379 void
emitFFMA()1380 CodeEmitterGM107::emitFFMA()
1381 {
1382 bool isLongIMMD = false;
1383 switch(insn->src(2).getFile()) {
1384 case FILE_GPR:
1385 switch (insn->src(1).getFile()) {
1386 case FILE_GPR:
1387 emitInsn(0x59800000);
1388 emitGPR (0x14, insn->src(1));
1389 break;
1390 case FILE_MEMORY_CONST:
1391 emitInsn(0x49800000);
1392 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1393 break;
1394 case FILE_IMMEDIATE:
1395 if (longIMMD(insn->getSrc(1))) {
1396 assert(insn->getDef(0)->reg.data.id == insn->getSrc(2)->reg.data.id);
1397 isLongIMMD = true;
1398 emitInsn(0x0c000000);
1399 emitIMMD(0x14, 32, insn->src(1));
1400 } else {
1401 emitInsn(0x32800000);
1402 emitIMMD(0x14, 19, insn->src(1));
1403 }
1404 break;
1405 default:
1406 assert(!"bad src1 file");
1407 break;
1408 }
1409 if (!isLongIMMD)
1410 emitGPR (0x27, insn->src(2));
1411 break;
1412 case FILE_MEMORY_CONST:
1413 emitInsn(0x51800000);
1414 emitGPR (0x27, insn->src(1));
1415 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1416 break;
1417 default:
1418 assert(!"bad src2 file");
1419 break;
1420 }
1421
1422 if (isLongIMMD) {
1423 emitNEG (0x39, insn->src(2));
1424 emitNEG2(0x38, insn->src(0), insn->src(1));
1425 emitSAT (0x37);
1426 emitCC (0x34);
1427 } else {
1428 emitRND (0x33);
1429 emitSAT (0x32);
1430 emitNEG (0x31, insn->src(2));
1431 emitNEG2(0x30, insn->src(0), insn->src(1));
1432 emitCC (0x2f);
1433 }
1434
1435 emitFMZ(0x35, 2);
1436 emitGPR(0x08, insn->src(0));
1437 emitGPR(0x00, insn->def(0));
1438 }
1439
1440 void
emitMUFU()1441 CodeEmitterGM107::emitMUFU()
1442 {
1443 int mufu = 0;
1444
1445 switch (insn->op) {
1446 case OP_COS: mufu = 0; break;
1447 case OP_SIN: mufu = 1; break;
1448 case OP_EX2: mufu = 2; break;
1449 case OP_LG2: mufu = 3; break;
1450 case OP_RCP: mufu = 4 + 2 * insn->subOp; break;
1451 case OP_RSQ: mufu = 5 + 2 * insn->subOp; break;
1452 case OP_SQRT: mufu = 8; break;
1453 default:
1454 assert(!"invalid mufu");
1455 break;
1456 }
1457
1458 emitInsn (0x50800000);
1459 emitSAT (0x32);
1460 emitNEG (0x30, insn->src(0));
1461 emitABS (0x2e, insn->src(0));
1462 emitField(0x14, 4, mufu);
1463 emitGPR (0x08, insn->src(0));
1464 emitGPR (0x00, insn->def(0));
1465 }
1466
1467 void
emitFMNMX()1468 CodeEmitterGM107::emitFMNMX()
1469 {
1470 switch (insn->src(1).getFile()) {
1471 case FILE_GPR:
1472 emitInsn(0x5c600000);
1473 emitGPR (0x14, insn->src(1));
1474 break;
1475 case FILE_MEMORY_CONST:
1476 emitInsn(0x4c600000);
1477 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1478 break;
1479 case FILE_IMMEDIATE:
1480 emitInsn(0x38600000);
1481 emitIMMD(0x14, 19, insn->src(1));
1482 break;
1483 default:
1484 assert(!"bad src1 file");
1485 break;
1486 }
1487
1488 emitField(0x2a, 1, insn->op == OP_MAX);
1489 emitPRED (0x27);
1490
1491 emitABS(0x31, insn->src(1));
1492 emitNEG(0x30, insn->src(0));
1493 emitCC (0x2f);
1494 emitABS(0x2e, insn->src(0));
1495 emitNEG(0x2d, insn->src(1));
1496 emitFMZ(0x2c, 1);
1497 emitGPR(0x08, insn->src(0));
1498 emitGPR(0x00, insn->def(0));
1499 }
1500
1501 void
emitRRO()1502 CodeEmitterGM107::emitRRO()
1503 {
1504 switch (insn->src(0).getFile()) {
1505 case FILE_GPR:
1506 emitInsn(0x5c900000);
1507 emitGPR (0x14, insn->src(0));
1508 break;
1509 case FILE_MEMORY_CONST:
1510 emitInsn(0x4c900000);
1511 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1512 break;
1513 case FILE_IMMEDIATE:
1514 emitInsn(0x38900000);
1515 emitIMMD(0x14, 19, insn->src(0));
1516 break;
1517 default:
1518 assert(!"bad src file");
1519 break;
1520 }
1521
1522 emitABS (0x31, insn->src(0));
1523 emitNEG (0x2d, insn->src(0));
1524 emitField(0x27, 1, insn->op == OP_PREEX2);
1525 emitGPR (0x00, insn->def(0));
1526 }
1527
1528 void
emitFCMP()1529 CodeEmitterGM107::emitFCMP()
1530 {
1531 const CmpInstruction *insn = this->insn->asCmp();
1532 CondCode cc = insn->setCond;
1533
1534 if (insn->src(2).mod.neg())
1535 cc = reverseCondCode(cc);
1536
1537 switch(insn->src(2).getFile()) {
1538 case FILE_GPR:
1539 switch (insn->src(1).getFile()) {
1540 case FILE_GPR:
1541 emitInsn(0x5ba00000);
1542 emitGPR (0x14, insn->src(1));
1543 break;
1544 case FILE_MEMORY_CONST:
1545 emitInsn(0x4ba00000);
1546 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1547 break;
1548 case FILE_IMMEDIATE:
1549 emitInsn(0x36a00000);
1550 emitIMMD(0x14, 19, insn->src(1));
1551 break;
1552 default:
1553 assert(!"bad src1 file");
1554 break;
1555 }
1556 emitGPR (0x27, insn->src(2));
1557 break;
1558 case FILE_MEMORY_CONST:
1559 emitInsn(0x53a00000);
1560 emitGPR (0x27, insn->src(1));
1561 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1562 break;
1563 default:
1564 assert(!"bad src2 file");
1565 break;
1566 }
1567
1568 emitCond4(0x30, cc);
1569 emitFMZ (0x2f, 1);
1570 emitGPR (0x08, insn->src(0));
1571 emitGPR (0x00, insn->def(0));
1572 }
1573
1574 void
emitFSET()1575 CodeEmitterGM107::emitFSET()
1576 {
1577 const CmpInstruction *insn = this->insn->asCmp();
1578
1579 switch (insn->src(1).getFile()) {
1580 case FILE_GPR:
1581 emitInsn(0x58000000);
1582 emitGPR (0x14, insn->src(1));
1583 break;
1584 case FILE_MEMORY_CONST:
1585 emitInsn(0x48000000);
1586 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1587 break;
1588 case FILE_IMMEDIATE:
1589 emitInsn(0x30000000);
1590 emitIMMD(0x14, 19, insn->src(1));
1591 break;
1592 default:
1593 assert(!"bad src1 file");
1594 break;
1595 }
1596
1597 if (insn->op != OP_SET) {
1598 switch (insn->op) {
1599 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1600 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1601 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1602 default:
1603 assert(!"invalid set op");
1604 break;
1605 }
1606 emitPRED(0x27, insn->src(2));
1607 } else {
1608 emitPRED(0x27);
1609 }
1610
1611 emitFMZ (0x37, 1);
1612 emitABS (0x36, insn->src(0));
1613 emitNEG (0x35, insn->src(1));
1614 emitField(0x34, 1, insn->dType == TYPE_F32);
1615 emitCond4(0x30, insn->setCond);
1616 emitCC (0x2f);
1617 emitABS (0x2c, insn->src(1));
1618 emitNEG (0x2b, insn->src(0));
1619 emitGPR (0x08, insn->src(0));
1620 emitGPR (0x00, insn->def(0));
1621 }
1622
1623 void
emitFSETP()1624 CodeEmitterGM107::emitFSETP()
1625 {
1626 const CmpInstruction *insn = this->insn->asCmp();
1627
1628 switch (insn->src(1).getFile()) {
1629 case FILE_GPR:
1630 emitInsn(0x5bb00000);
1631 emitGPR (0x14, insn->src(1));
1632 break;
1633 case FILE_MEMORY_CONST:
1634 emitInsn(0x4bb00000);
1635 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1636 break;
1637 case FILE_IMMEDIATE:
1638 emitInsn(0x36b00000);
1639 emitIMMD(0x14, 19, insn->src(1));
1640 break;
1641 default:
1642 assert(!"bad src1 file");
1643 break;
1644 }
1645
1646 if (insn->op != OP_SET) {
1647 switch (insn->op) {
1648 case OP_SET_AND: emitField(0x2d, 2, 0); break;
1649 case OP_SET_OR : emitField(0x2d, 2, 1); break;
1650 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
1651 default:
1652 assert(!"invalid set op");
1653 break;
1654 }
1655 emitPRED(0x27, insn->src(2));
1656 } else {
1657 emitPRED(0x27);
1658 }
1659
1660 emitCond4(0x30, insn->setCond);
1661 emitFMZ (0x2f, 1);
1662 emitABS (0x2c, insn->src(1));
1663 emitNEG (0x2b, insn->src(0));
1664 emitGPR (0x08, insn->src(0));
1665 emitABS (0x07, insn->src(0));
1666 emitNEG (0x06, insn->src(1));
1667 emitPRED (0x03, insn->def(0));
1668 if (insn->defExists(1))
1669 emitPRED(0x00, insn->def(1));
1670 else
1671 emitPRED(0x00);
1672 }
1673
1674 void
emitFSWZADD()1675 CodeEmitterGM107::emitFSWZADD()
1676 {
1677 emitInsn (0x50f80000);
1678 emitCC (0x2f);
1679 emitFMZ (0x2c, 1);
1680 emitRND (0x27);
1681 emitField(0x26, 1, insn->lanes); /* abused for .ndv */
1682 emitField(0x1c, 8, insn->subOp);
1683 if (insn->predSrc != 1)
1684 emitGPR (0x14, insn->src(1));
1685 else
1686 emitGPR (0x14);
1687 emitGPR (0x08, insn->src(0));
1688 emitGPR (0x00, insn->def(0));
1689 }
1690
1691 /*******************************************************************************
1692 * integer
1693 ******************************************************************************/
1694
1695 void
emitLOP()1696 CodeEmitterGM107::emitLOP()
1697 {
1698 int lop = 0;
1699
1700 switch (insn->op) {
1701 case OP_AND: lop = 0; break;
1702 case OP_OR : lop = 1; break;
1703 case OP_XOR: lop = 2; break;
1704 default:
1705 assert(!"invalid lop");
1706 break;
1707 }
1708
1709 if (!longIMMD(insn->src(1))) {
1710 switch (insn->src(1).getFile()) {
1711 case FILE_GPR:
1712 emitInsn(0x5c400000);
1713 emitGPR (0x14, insn->src(1));
1714 break;
1715 case FILE_MEMORY_CONST:
1716 emitInsn(0x4c400000);
1717 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1718 break;
1719 case FILE_IMMEDIATE:
1720 emitInsn(0x38400000);
1721 emitIMMD(0x14, 19, insn->src(1));
1722 break;
1723 default:
1724 assert(!"bad src1 file");
1725 break;
1726 }
1727 emitPRED (0x30);
1728 emitCC (0x2f);
1729 emitX (0x2b);
1730 emitField(0x29, 2, lop);
1731 emitINV (0x28, insn->src(1));
1732 emitINV (0x27, insn->src(0));
1733 } else {
1734 emitInsn (0x04000000);
1735 emitX (0x39);
1736 emitINV (0x38, insn->src(1));
1737 emitINV (0x37, insn->src(0));
1738 emitField(0x35, 2, lop);
1739 emitCC (0x34);
1740 emitIMMD (0x14, 32, insn->src(1));
1741 }
1742
1743 emitGPR (0x08, insn->src(0));
1744 emitGPR (0x00, insn->def(0));
1745 }
1746
1747 /* special-case of emitLOP(): lop pass_b dst 0 ~src */
1748 void
emitNOT()1749 CodeEmitterGM107::emitNOT()
1750 {
1751 if (!longIMMD(insn->src(0))) {
1752 switch (insn->src(0).getFile()) {
1753 case FILE_GPR:
1754 emitInsn(0x5c400700);
1755 emitGPR (0x14, insn->src(0));
1756 break;
1757 case FILE_MEMORY_CONST:
1758 emitInsn(0x4c400700);
1759 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
1760 break;
1761 case FILE_IMMEDIATE:
1762 emitInsn(0x38400700);
1763 emitIMMD(0x14, 19, insn->src(0));
1764 break;
1765 default:
1766 assert(!"bad src1 file");
1767 break;
1768 }
1769 emitPRED (0x30);
1770 } else {
1771 emitInsn (0x05600000);
1772 emitIMMD (0x14, 32, insn->src(1));
1773 }
1774
1775 emitGPR(0x08);
1776 emitGPR(0x00, insn->def(0));
1777 }
1778
1779 void
emitIADD()1780 CodeEmitterGM107::emitIADD()
1781 {
1782 if (!longIMMD(insn->src(1))) {
1783 switch (insn->src(1).getFile()) {
1784 case FILE_GPR:
1785 emitInsn(0x5c100000);
1786 emitGPR (0x14, insn->src(1));
1787 break;
1788 case FILE_MEMORY_CONST:
1789 emitInsn(0x4c100000);
1790 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1791 break;
1792 case FILE_IMMEDIATE:
1793 emitInsn(0x38100000);
1794 emitIMMD(0x14, 19, insn->src(1));
1795 break;
1796 default:
1797 assert(!"bad src1 file");
1798 break;
1799 }
1800 emitSAT(0x32);
1801 emitNEG(0x31, insn->src(0));
1802 emitNEG(0x30, insn->src(1));
1803 emitCC (0x2f);
1804 emitX (0x2b);
1805 } else {
1806 emitInsn(0x1c000000);
1807 emitNEG (0x38, insn->src(0));
1808 emitSAT (0x36);
1809 emitX (0x35);
1810 emitCC (0x34);
1811 emitIMMD(0x14, 32, insn->src(1));
1812 }
1813
1814 if (insn->op == OP_SUB)
1815 code[1] ^= 0x00010000;
1816
1817 emitGPR(0x08, insn->src(0));
1818 emitGPR(0x00, insn->def(0));
1819 }
1820
1821 void
emitIMUL()1822 CodeEmitterGM107::emitIMUL()
1823 {
1824 if (!longIMMD(insn->src(1))) {
1825 switch (insn->src(1).getFile()) {
1826 case FILE_GPR:
1827 emitInsn(0x5c380000);
1828 emitGPR (0x14, insn->src(1));
1829 break;
1830 case FILE_MEMORY_CONST:
1831 emitInsn(0x4c380000);
1832 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1833 break;
1834 case FILE_IMMEDIATE:
1835 emitInsn(0x38380000);
1836 emitIMMD(0x14, 19, insn->src(1));
1837 break;
1838 default:
1839 assert(!"bad src1 file");
1840 break;
1841 }
1842 emitCC (0x2f);
1843 emitField(0x29, 1, isSignedType(insn->sType));
1844 emitField(0x28, 1, isSignedType(insn->dType));
1845 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1846 } else {
1847 emitInsn (0x1f000000);
1848 emitField(0x37, 1, isSignedType(insn->sType));
1849 emitField(0x36, 1, isSignedType(insn->dType));
1850 emitField(0x35, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1851 emitCC (0x34);
1852 emitIMMD (0x14, 32, insn->src(1));
1853 }
1854
1855 emitGPR(0x08, insn->src(0));
1856 emitGPR(0x00, insn->def(0));
1857 }
1858
1859 void
emitIMAD()1860 CodeEmitterGM107::emitIMAD()
1861 {
1862 /*XXX: imad32i exists, but not using it as third src overlaps dst */
1863 switch(insn->src(2).getFile()) {
1864 case FILE_GPR:
1865 switch (insn->src(1).getFile()) {
1866 case FILE_GPR:
1867 emitInsn(0x5a000000);
1868 emitGPR (0x14, insn->src(1));
1869 break;
1870 case FILE_MEMORY_CONST:
1871 emitInsn(0x4a000000);
1872 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1873 break;
1874 case FILE_IMMEDIATE:
1875 emitInsn(0x34000000);
1876 emitIMMD(0x14, 19, insn->src(1));
1877 break;
1878 default:
1879 assert(!"bad src1 file");
1880 break;
1881 }
1882 emitGPR (0x27, insn->src(2));
1883 break;
1884 case FILE_MEMORY_CONST:
1885 emitInsn(0x52000000);
1886 emitGPR (0x27, insn->src(1));
1887 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1888 break;
1889 default:
1890 assert(!"bad src2 file");
1891 break;
1892 }
1893
1894 emitField(0x36, 1, insn->subOp == NV50_IR_SUBOP_MUL_HIGH);
1895 emitField(0x35, 1, isSignedType(insn->sType));
1896 emitNEG (0x34, insn->src(2));
1897 emitNEG2 (0x33, insn->src(0), insn->src(1));
1898 emitSAT (0x32);
1899 emitX (0x31);
1900 emitField(0x30, 1, isSignedType(insn->dType));
1901 emitCC (0x2f);
1902 emitGPR (0x08, insn->src(0));
1903 emitGPR (0x00, insn->def(0));
1904 }
1905
1906 void
emitISCADD()1907 CodeEmitterGM107::emitISCADD()
1908 {
1909 assert(insn->src(1).get()->asImm());
1910
1911 switch (insn->src(2).getFile()) {
1912 case FILE_GPR:
1913 emitInsn(0x5c180000);
1914 emitGPR (0x14, insn->src(2));
1915 break;
1916 case FILE_MEMORY_CONST:
1917 emitInsn(0x4c180000);
1918 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1919 break;
1920 case FILE_IMMEDIATE:
1921 emitInsn(0x38180000);
1922 emitIMMD(0x14, 19, insn->src(2));
1923 break;
1924 default:
1925 assert(!"bad src1 file");
1926 break;
1927 }
1928 emitNEG (0x31, insn->src(0));
1929 emitNEG (0x30, insn->src(2));
1930 emitCC (0x2f);
1931 emitIMMD(0x27, 5, insn->src(1));
1932 emitGPR (0x08, insn->src(0));
1933 emitGPR (0x00, insn->def(0));
1934 }
1935
1936 void
emitXMAD()1937 CodeEmitterGM107::emitXMAD()
1938 {
1939 assert(insn->src(0).getFile() == FILE_GPR);
1940
1941 bool constbuf = false;
1942 bool psl_mrg = true;
1943 bool immediate = false;
1944 if (insn->src(2).getFile() == FILE_MEMORY_CONST) {
1945 assert(insn->src(1).getFile() == FILE_GPR);
1946 constbuf = true;
1947 psl_mrg = false;
1948 emitInsn(0x51000000);
1949 emitGPR(0x27, insn->src(1));
1950 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
1951 } else if (insn->src(1).getFile() == FILE_MEMORY_CONST) {
1952 assert(insn->src(2).getFile() == FILE_GPR);
1953 constbuf = true;
1954 emitInsn(0x4e000000);
1955 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
1956 emitGPR(0x27, insn->src(2));
1957 } else if (insn->src(1).getFile() == FILE_IMMEDIATE) {
1958 assert(insn->src(2).getFile() == FILE_GPR);
1959 assert(!(insn->subOp & NV50_IR_SUBOP_XMAD_H1(1)));
1960 immediate = true;
1961 emitInsn(0x36000000);
1962 emitIMMD(0x14, 16, insn->src(1));
1963 emitGPR(0x27, insn->src(2));
1964 } else {
1965 assert(insn->src(1).getFile() == FILE_GPR);
1966 assert(insn->src(2).getFile() == FILE_GPR);
1967 emitInsn(0x5b000000);
1968 emitGPR(0x14, insn->src(1));
1969 emitGPR(0x27, insn->src(2));
1970 }
1971
1972 if (psl_mrg)
1973 emitField(constbuf ? 0x37 : 0x24, 2, insn->subOp & 0x3);
1974
1975 unsigned cmode = (insn->subOp & NV50_IR_SUBOP_XMAD_CMODE_MASK);
1976 cmode >>= NV50_IR_SUBOP_XMAD_CMODE_SHIFT;
1977 emitField(0x32, constbuf ? 2 : 3, cmode);
1978
1979 emitX(constbuf ? 0x36 : 0x26);
1980 emitCC(0x2f);
1981
1982 emitGPR(0x0, insn->def(0));
1983 emitGPR(0x8, insn->src(0));
1984
1985 // source flags
1986 if (isSignedType(insn->sType)) {
1987 uint16_t h1s = insn->subOp & NV50_IR_SUBOP_XMAD_H1_MASK;
1988 emitField(0x30, 2, h1s >> NV50_IR_SUBOP_XMAD_H1_SHIFT);
1989 }
1990 emitField(0x35, 1, insn->subOp & NV50_IR_SUBOP_XMAD_H1(0) ? 1 : 0);
1991 if (!immediate) {
1992 bool h1 = insn->subOp & NV50_IR_SUBOP_XMAD_H1(1);
1993 emitField(constbuf ? 0x34 : 0x23, 1, h1);
1994 }
1995 }
1996
1997 void
emitIMNMX()1998 CodeEmitterGM107::emitIMNMX()
1999 {
2000 switch (insn->src(1).getFile()) {
2001 case FILE_GPR:
2002 emitInsn(0x5c200000);
2003 emitGPR (0x14, insn->src(1));
2004 break;
2005 case FILE_MEMORY_CONST:
2006 emitInsn(0x4c200000);
2007 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2008 break;
2009 case FILE_IMMEDIATE:
2010 emitInsn(0x38200000);
2011 emitIMMD(0x14, 19, insn->src(1));
2012 break;
2013 default:
2014 assert(!"bad src1 file");
2015 break;
2016 }
2017
2018 emitField(0x30, 1, isSignedType(insn->dType));
2019 emitCC (0x2f);
2020 emitField(0x2b, 2, insn->subOp);
2021 emitField(0x2a, 1, insn->op == OP_MAX);
2022 emitPRED (0x27);
2023 emitGPR (0x08, insn->src(0));
2024 emitGPR (0x00, insn->def(0));
2025 }
2026
2027 void
emitICMP()2028 CodeEmitterGM107::emitICMP()
2029 {
2030 const CmpInstruction *insn = this->insn->asCmp();
2031 CondCode cc = insn->setCond;
2032
2033 if (insn->src(2).mod.neg())
2034 cc = reverseCondCode(cc);
2035
2036 switch(insn->src(2).getFile()) {
2037 case FILE_GPR:
2038 switch (insn->src(1).getFile()) {
2039 case FILE_GPR:
2040 emitInsn(0x5b400000);
2041 emitGPR (0x14, insn->src(1));
2042 break;
2043 case FILE_MEMORY_CONST:
2044 emitInsn(0x4b400000);
2045 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2046 break;
2047 case FILE_IMMEDIATE:
2048 emitInsn(0x36400000);
2049 emitIMMD(0x14, 19, insn->src(1));
2050 break;
2051 default:
2052 assert(!"bad src1 file");
2053 break;
2054 }
2055 emitGPR (0x27, insn->src(2));
2056 break;
2057 case FILE_MEMORY_CONST:
2058 emitInsn(0x53400000);
2059 emitGPR (0x27, insn->src(1));
2060 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2061 break;
2062 default:
2063 assert(!"bad src2 file");
2064 break;
2065 }
2066
2067 emitCond3(0x31, cc);
2068 emitField(0x30, 1, isSignedType(insn->sType));
2069 emitGPR (0x08, insn->src(0));
2070 emitGPR (0x00, insn->def(0));
2071 }
2072
2073 void
emitISET()2074 CodeEmitterGM107::emitISET()
2075 {
2076 const CmpInstruction *insn = this->insn->asCmp();
2077
2078 switch (insn->src(1).getFile()) {
2079 case FILE_GPR:
2080 emitInsn(0x5b500000);
2081 emitGPR (0x14, insn->src(1));
2082 break;
2083 case FILE_MEMORY_CONST:
2084 emitInsn(0x4b500000);
2085 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2086 break;
2087 case FILE_IMMEDIATE:
2088 emitInsn(0x36500000);
2089 emitIMMD(0x14, 19, insn->src(1));
2090 break;
2091 default:
2092 assert(!"bad src1 file");
2093 break;
2094 }
2095
2096 if (insn->op != OP_SET) {
2097 switch (insn->op) {
2098 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2099 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2100 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2101 default:
2102 assert(!"invalid set op");
2103 break;
2104 }
2105 emitPRED(0x27, insn->src(2));
2106 } else {
2107 emitPRED(0x27);
2108 }
2109
2110 emitCond3(0x31, insn->setCond);
2111 emitField(0x30, 1, isSignedType(insn->sType));
2112 emitCC (0x2f);
2113 emitField(0x2c, 1, insn->dType == TYPE_F32);
2114 emitX (0x2b);
2115 emitGPR (0x08, insn->src(0));
2116 emitGPR (0x00, insn->def(0));
2117 }
2118
2119 void
emitISETP()2120 CodeEmitterGM107::emitISETP()
2121 {
2122 const CmpInstruction *insn = this->insn->asCmp();
2123
2124 switch (insn->src(1).getFile()) {
2125 case FILE_GPR:
2126 emitInsn(0x5b600000);
2127 emitGPR (0x14, insn->src(1));
2128 break;
2129 case FILE_MEMORY_CONST:
2130 emitInsn(0x4b600000);
2131 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2132 break;
2133 case FILE_IMMEDIATE:
2134 emitInsn(0x36600000);
2135 emitIMMD(0x14, 19, insn->src(1));
2136 break;
2137 default:
2138 assert(!"bad src1 file");
2139 break;
2140 }
2141
2142 if (insn->op != OP_SET) {
2143 switch (insn->op) {
2144 case OP_SET_AND: emitField(0x2d, 2, 0); break;
2145 case OP_SET_OR : emitField(0x2d, 2, 1); break;
2146 case OP_SET_XOR: emitField(0x2d, 2, 2); break;
2147 default:
2148 assert(!"invalid set op");
2149 break;
2150 }
2151 emitPRED(0x27, insn->src(2));
2152 } else {
2153 emitPRED(0x27);
2154 }
2155
2156 emitCond3(0x31, insn->setCond);
2157 emitField(0x30, 1, isSignedType(insn->sType));
2158 emitX (0x2b);
2159 emitGPR (0x08, insn->src(0));
2160 emitPRED (0x03, insn->def(0));
2161 if (insn->defExists(1))
2162 emitPRED(0x00, insn->def(1));
2163 else
2164 emitPRED(0x00);
2165 }
2166
2167 void
emitSHL()2168 CodeEmitterGM107::emitSHL()
2169 {
2170 switch (insn->src(1).getFile()) {
2171 case FILE_GPR:
2172 emitInsn(0x5c480000);
2173 emitGPR (0x14, insn->src(1));
2174 break;
2175 case FILE_MEMORY_CONST:
2176 emitInsn(0x4c480000);
2177 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2178 break;
2179 case FILE_IMMEDIATE:
2180 emitInsn(0x38480000);
2181 emitIMMD(0x14, 19, insn->src(1));
2182 break;
2183 default:
2184 assert(!"bad src1 file");
2185 break;
2186 }
2187
2188 emitCC (0x2f);
2189 emitX (0x2b);
2190 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2191 emitGPR (0x08, insn->src(0));
2192 emitGPR (0x00, insn->def(0));
2193 }
2194
2195 void
emitSHR()2196 CodeEmitterGM107::emitSHR()
2197 {
2198 switch (insn->src(1).getFile()) {
2199 case FILE_GPR:
2200 emitInsn(0x5c280000);
2201 emitGPR (0x14, insn->src(1));
2202 break;
2203 case FILE_MEMORY_CONST:
2204 emitInsn(0x4c280000);
2205 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2206 break;
2207 case FILE_IMMEDIATE:
2208 emitInsn(0x38280000);
2209 emitIMMD(0x14, 19, insn->src(1));
2210 break;
2211 default:
2212 assert(!"bad src1 file");
2213 break;
2214 }
2215
2216 emitField(0x30, 1, isSignedType(insn->dType));
2217 emitCC (0x2f);
2218 emitX (0x2c);
2219 emitField(0x27, 1, insn->subOp == NV50_IR_SUBOP_SHIFT_WRAP);
2220 emitGPR (0x08, insn->src(0));
2221 emitGPR (0x00, insn->def(0));
2222 }
2223
2224 void
emitSHF()2225 CodeEmitterGM107::emitSHF()
2226 {
2227 unsigned type;
2228
2229 switch (insn->src(1).getFile()) {
2230 case FILE_GPR:
2231 emitInsn(insn->op == OP_SHL ? 0x5bf80000 : 0x5cf80000);
2232 emitGPR(0x14, insn->src(1));
2233 break;
2234 case FILE_IMMEDIATE:
2235 emitInsn(insn->op == OP_SHL ? 0x36f80000 : 0x38f80000);
2236 emitIMMD(0x14, 19, insn->src(1));
2237 break;
2238 default:
2239 assert(!"bad src1 file");
2240 break;
2241 }
2242
2243 switch (insn->sType) {
2244 case TYPE_U64:
2245 type = 2;
2246 break;
2247 case TYPE_S64:
2248 type = 3;
2249 break;
2250 default:
2251 type = 0;
2252 break;
2253 }
2254
2255 emitField(0x32, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_WRAP));
2256 emitX (0x31);
2257 emitField(0x30, 1, !!(insn->subOp & NV50_IR_SUBOP_SHIFT_HIGH));
2258 emitCC (0x2f);
2259 emitGPR (0x27, insn->src(2));
2260 emitField(0x25, 2, type);
2261 emitGPR (0x08, insn->src(0));
2262 emitGPR (0x00, insn->def(0));
2263 }
2264
2265 void
emitPOPC()2266 CodeEmitterGM107::emitPOPC()
2267 {
2268 switch (insn->src(0).getFile()) {
2269 case FILE_GPR:
2270 emitInsn(0x5c080000);
2271 emitGPR (0x14, insn->src(0));
2272 break;
2273 case FILE_MEMORY_CONST:
2274 emitInsn(0x4c080000);
2275 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2276 break;
2277 case FILE_IMMEDIATE:
2278 emitInsn(0x38080000);
2279 emitIMMD(0x14, 19, insn->src(0));
2280 break;
2281 default:
2282 assert(!"bad src1 file");
2283 break;
2284 }
2285
2286 emitINV(0x28, insn->src(0));
2287 emitGPR(0x00, insn->def(0));
2288 }
2289
2290 void
emitBFI()2291 CodeEmitterGM107::emitBFI()
2292 {
2293 switch(insn->src(2).getFile()) {
2294 case FILE_GPR:
2295 switch (insn->src(1).getFile()) {
2296 case FILE_GPR:
2297 emitInsn(0x5bf00000);
2298 emitGPR (0x14, insn->src(1));
2299 break;
2300 case FILE_MEMORY_CONST:
2301 emitInsn(0x4bf00000);
2302 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2303 break;
2304 case FILE_IMMEDIATE:
2305 emitInsn(0x36f00000);
2306 emitIMMD(0x14, 19, insn->src(1));
2307 break;
2308 default:
2309 assert(!"bad src1 file");
2310 break;
2311 }
2312 emitGPR (0x27, insn->src(2));
2313 break;
2314 case FILE_MEMORY_CONST:
2315 emitInsn(0x53f00000);
2316 emitGPR (0x27, insn->src(1));
2317 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(2));
2318 break;
2319 default:
2320 assert(!"bad src2 file");
2321 break;
2322 }
2323
2324 emitCC (0x2f);
2325 emitGPR (0x08, insn->src(0));
2326 emitGPR (0x00, insn->def(0));
2327 }
2328
2329 void
emitBFE()2330 CodeEmitterGM107::emitBFE()
2331 {
2332 switch (insn->src(1).getFile()) {
2333 case FILE_GPR:
2334 emitInsn(0x5c000000);
2335 emitGPR (0x14, insn->src(1));
2336 break;
2337 case FILE_MEMORY_CONST:
2338 emitInsn(0x4c000000);
2339 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2340 break;
2341 case FILE_IMMEDIATE:
2342 emitInsn(0x38000000);
2343 emitIMMD(0x14, 19, insn->src(1));
2344 break;
2345 default:
2346 assert(!"bad src1 file");
2347 break;
2348 }
2349
2350 emitField(0x30, 1, isSignedType(insn->dType));
2351 emitCC (0x2f);
2352 emitField(0x28, 1, insn->subOp == NV50_IR_SUBOP_EXTBF_REV);
2353 emitGPR (0x08, insn->src(0));
2354 emitGPR (0x00, insn->def(0));
2355 }
2356
2357 void
emitFLO()2358 CodeEmitterGM107::emitFLO()
2359 {
2360 switch (insn->src(0).getFile()) {
2361 case FILE_GPR:
2362 emitInsn(0x5c300000);
2363 emitGPR (0x14, insn->src(0));
2364 break;
2365 case FILE_MEMORY_CONST:
2366 emitInsn(0x4c300000);
2367 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(0));
2368 break;
2369 case FILE_IMMEDIATE:
2370 emitInsn(0x38300000);
2371 emitIMMD(0x14, 19, insn->src(0));
2372 break;
2373 default:
2374 assert(!"bad src1 file");
2375 break;
2376 }
2377
2378 emitField(0x30, 1, isSignedType(insn->dType));
2379 emitCC (0x2f);
2380 emitField(0x29, 1, insn->subOp == NV50_IR_SUBOP_BFIND_SAMT);
2381 emitINV (0x28, insn->src(0));
2382 emitGPR (0x00, insn->def(0));
2383 }
2384
2385 void
emitPRMT()2386 CodeEmitterGM107::emitPRMT()
2387 {
2388 switch (insn->src(1).getFile()) {
2389 case FILE_GPR:
2390 emitInsn(0x5bc00000);
2391 emitGPR (0x14, insn->src(1));
2392 break;
2393 case FILE_MEMORY_CONST:
2394 emitInsn(0x4bc00000);
2395 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
2396 break;
2397 case FILE_IMMEDIATE:
2398 emitInsn(0x36c00000);
2399 emitIMMD(0x14, 19, insn->src(1));
2400 break;
2401 default:
2402 assert(!"bad src1 file");
2403 break;
2404 }
2405
2406 emitField(0x30, 3, insn->subOp);
2407 emitGPR (0x27, insn->src(2));
2408 emitGPR (0x08, insn->src(0));
2409 emitGPR (0x00, insn->def(0));
2410 }
2411
2412 /*******************************************************************************
2413 * memory
2414 ******************************************************************************/
2415
2416 void
emitLDSTs(int pos,DataType type)2417 CodeEmitterGM107::emitLDSTs(int pos, DataType type)
2418 {
2419 int data = 0;
2420
2421 switch (typeSizeof(type)) {
2422 case 1: data = isSignedType(type) ? 1 : 0; break;
2423 case 2: data = isSignedType(type) ? 3 : 2; break;
2424 case 4: data = 4; break;
2425 case 8: data = 5; break;
2426 case 16: data = 6; break;
2427 default:
2428 assert(!"bad type");
2429 break;
2430 }
2431
2432 emitField(pos, 3, data);
2433 }
2434
2435 void
emitLDSTc(int pos)2436 CodeEmitterGM107::emitLDSTc(int pos)
2437 {
2438 int mode = 0;
2439
2440 switch (insn->cache) {
2441 case CACHE_CA: mode = 0; break;
2442 case CACHE_CG: mode = 1; break;
2443 case CACHE_CS: mode = 2; break;
2444 case CACHE_CV: mode = 3; break;
2445 default:
2446 assert(!"invalid caching mode");
2447 break;
2448 }
2449
2450 emitField(pos, 2, mode);
2451 }
2452
2453 void
emitLDC()2454 CodeEmitterGM107::emitLDC()
2455 {
2456 emitInsn (0xef900000);
2457 emitLDSTs(0x30, insn->dType);
2458 emitField(0x2c, 2, insn->subOp);
2459 emitCBUF (0x24, 0x08, 0x14, 16, 0, insn->src(0));
2460 emitGPR (0x00, insn->def(0));
2461 }
2462
2463 void
emitLDL()2464 CodeEmitterGM107::emitLDL()
2465 {
2466 emitInsn (0xef400000);
2467 emitLDSTs(0x30, insn->dType);
2468 emitLDSTc(0x2c);
2469 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2470 emitGPR (0x00, insn->def(0));
2471 }
2472
2473 void
emitLDS()2474 CodeEmitterGM107::emitLDS()
2475 {
2476 emitInsn (0xef480000);
2477 emitLDSTs(0x30, insn->dType);
2478 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2479 emitGPR (0x00, insn->def(0));
2480 }
2481
2482 void
emitLD()2483 CodeEmitterGM107::emitLD()
2484 {
2485 emitInsn (0x80000000);
2486 emitPRED (0x3a);
2487 emitLDSTc(0x38);
2488 emitLDSTs(0x35, insn->dType);
2489 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2490 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2491 emitGPR (0x00, insn->def(0));
2492 }
2493
2494 void
emitSTL()2495 CodeEmitterGM107::emitSTL()
2496 {
2497 emitInsn (0xef500000);
2498 emitLDSTs(0x30, insn->dType);
2499 emitLDSTc(0x2c);
2500 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2501 emitGPR (0x00, insn->src(1));
2502 }
2503
2504 void
emitSTS()2505 CodeEmitterGM107::emitSTS()
2506 {
2507 emitInsn (0xef580000);
2508 emitLDSTs(0x30, insn->dType);
2509 emitADDR (0x08, 0x14, 24, 0, insn->src(0));
2510 emitGPR (0x00, insn->src(1));
2511 }
2512
2513 void
emitST()2514 CodeEmitterGM107::emitST()
2515 {
2516 emitInsn (0xa0000000);
2517 emitPRED (0x3a);
2518 emitLDSTc(0x38);
2519 emitLDSTs(0x35, insn->dType);
2520 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2521 emitADDR (0x08, 0x14, 32, 0, insn->src(0));
2522 emitGPR (0x00, insn->src(1));
2523 }
2524
2525 void
emitALD()2526 CodeEmitterGM107::emitALD()
2527 {
2528 emitInsn (0xefd80000);
2529 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2530 emitGPR (0x27, insn->src(0).getIndirect(1));
2531 emitO (0x20);
2532 emitP (0x1f);
2533 emitADDR (0x08, 20, 10, 0, insn->src(0));
2534 emitGPR (0x00, insn->def(0));
2535 }
2536
2537 void
emitAST()2538 CodeEmitterGM107::emitAST()
2539 {
2540 emitInsn (0xeff00000);
2541 emitField(0x2f, 2, (typeSizeof(insn->dType) / 4) - 1);
2542 emitGPR (0x27, insn->src(0).getIndirect(1));
2543 emitP (0x1f);
2544 emitADDR (0x08, 20, 10, 0, insn->src(0));
2545 emitGPR (0x00, insn->src(1));
2546 }
2547
2548 void
emitISBERD()2549 CodeEmitterGM107::emitISBERD()
2550 {
2551 emitInsn(0xefd00000);
2552 emitGPR (0x08, insn->src(0));
2553 emitGPR (0x00, insn->def(0));
2554 }
2555
2556 void
emitAL2P()2557 CodeEmitterGM107::emitAL2P()
2558 {
2559 emitInsn (0xefa00000);
2560 emitField(0x2f, 2, (insn->getDef(0)->reg.size / 4) - 1);
2561 emitPRED (0x2c);
2562 emitO (0x20);
2563 emitField(0x14, 11, insn->src(0).get()->reg.data.offset);
2564 emitGPR (0x08, insn->src(0).getIndirect(0));
2565 emitGPR (0x00, insn->def(0));
2566 }
2567
2568 void
gm107_interpApply(const FixupEntry * entry,uint32_t * code,const FixupData & data)2569 gm107_interpApply(const FixupEntry *entry, uint32_t *code, const FixupData& data)
2570 {
2571 int ipa = entry->ipa;
2572 int reg = entry->reg;
2573 int loc = entry->loc;
2574
2575 if (data.flatshade &&
2576 (ipa & NV50_IR_INTERP_MODE_MASK) == NV50_IR_INTERP_SC) {
2577 ipa = NV50_IR_INTERP_FLAT;
2578 reg = 0xff;
2579 } else if (data.force_persample_interp &&
2580 (ipa & NV50_IR_INTERP_SAMPLE_MASK) == NV50_IR_INTERP_DEFAULT &&
2581 (ipa & NV50_IR_INTERP_MODE_MASK) != NV50_IR_INTERP_FLAT) {
2582 ipa |= NV50_IR_INTERP_CENTROID;
2583 }
2584 code[loc + 1] &= ~(0xf << 0x14);
2585 code[loc + 1] |= (ipa & 0x3) << 0x16;
2586 code[loc + 1] |= (ipa & 0xc) << (0x14 - 2);
2587 code[loc + 0] &= ~(0xff << 0x14);
2588 code[loc + 0] |= reg << 0x14;
2589 }
2590
2591 void
emitIPA()2592 CodeEmitterGM107::emitIPA()
2593 {
2594 int ipam = 0, ipas = 0;
2595
2596 switch (insn->getInterpMode()) {
2597 case NV50_IR_INTERP_LINEAR : ipam = 0; break;
2598 case NV50_IR_INTERP_PERSPECTIVE: ipam = 1; break;
2599 case NV50_IR_INTERP_FLAT : ipam = 2; break;
2600 case NV50_IR_INTERP_SC : ipam = 3; break;
2601 default:
2602 assert(!"invalid ipa mode");
2603 break;
2604 }
2605
2606 switch (insn->getSampleMode()) {
2607 case NV50_IR_INTERP_DEFAULT : ipas = 0; break;
2608 case NV50_IR_INTERP_CENTROID: ipas = 1; break;
2609 case NV50_IR_INTERP_OFFSET : ipas = 2; break;
2610 default:
2611 assert(!"invalid ipa sample mode");
2612 break;
2613 }
2614
2615 emitInsn (0xe0000000);
2616 emitField(0x36, 2, ipam);
2617 emitField(0x34, 2, ipas);
2618 emitSAT (0x33);
2619 emitField(0x2f, 3, 7);
2620 emitADDR (0x08, 0x1c, 10, 0, insn->src(0));
2621 if ((code[0] & 0x0000ff00) != 0x0000ff00)
2622 code[1] |= 0x00000040; /* .idx */
2623 emitGPR(0x00, insn->def(0));
2624
2625 if (insn->op == OP_PINTERP) {
2626 emitGPR(0x14, insn->src(1));
2627 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2628 emitGPR(0x27, insn->src(2));
2629 addInterp(insn->ipa, insn->getSrc(1)->reg.data.id, gm107_interpApply);
2630 } else {
2631 if (insn->getSampleMode() == NV50_IR_INTERP_OFFSET)
2632 emitGPR(0x27, insn->src(1));
2633 emitGPR(0x14);
2634 addInterp(insn->ipa, 0xff, gm107_interpApply);
2635 }
2636
2637 if (insn->getSampleMode() != NV50_IR_INTERP_OFFSET)
2638 emitGPR(0x27);
2639 }
2640
2641 void
emitATOM()2642 CodeEmitterGM107::emitATOM()
2643 {
2644 unsigned dType, subOp;
2645
2646 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2647 switch (insn->dType) {
2648 case TYPE_U32: dType = 0; break;
2649 case TYPE_U64: dType = 1; break;
2650 default: assert(!"unexpected dType"); dType = 0; break;
2651 }
2652 subOp = 15;
2653
2654 emitInsn (0xee000000);
2655 } else {
2656 switch (insn->dType) {
2657 case TYPE_U32: dType = 0; break;
2658 case TYPE_S32: dType = 1; break;
2659 case TYPE_U64: dType = 2; break;
2660 case TYPE_F32: dType = 3; break;
2661 case TYPE_B128: dType = 4; break;
2662 case TYPE_S64: dType = 5; break;
2663 default: assert(!"unexpected dType"); dType = 0; break;
2664 }
2665 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2666 subOp = 8;
2667 else
2668 subOp = insn->subOp;
2669
2670 emitInsn (0xed000000);
2671 }
2672
2673 emitField(0x34, 4, subOp);
2674 emitField(0x31, 3, dType);
2675 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2676 emitGPR (0x14, insn->src(1));
2677 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2678 emitGPR (0x00, insn->def(0));
2679 }
2680
2681 void
emitATOMS()2682 CodeEmitterGM107::emitATOMS()
2683 {
2684 unsigned dType, subOp;
2685
2686 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
2687 switch (insn->dType) {
2688 case TYPE_U32: dType = 0; break;
2689 case TYPE_U64: dType = 1; break;
2690 default: assert(!"unexpected dType"); dType = 0; break;
2691 }
2692 subOp = 4;
2693
2694 emitInsn (0xee000000);
2695 emitField(0x34, 1, dType);
2696 } else {
2697 switch (insn->dType) {
2698 case TYPE_U32: dType = 0; break;
2699 case TYPE_S32: dType = 1; break;
2700 case TYPE_U64: dType = 2; break;
2701 case TYPE_S64: dType = 3; break;
2702 default: assert(!"unexpected dType"); dType = 0; break;
2703 }
2704
2705 if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH)
2706 subOp = 8;
2707 else
2708 subOp = insn->subOp;
2709
2710 emitInsn (0xec000000);
2711 emitField(0x1c, 3, dType);
2712 }
2713
2714 emitField(0x34, 4, subOp);
2715 emitGPR (0x14, insn->src(1));
2716 emitADDR (0x08, 0x1e, 22, 2, insn->src(0));
2717 emitGPR (0x00, insn->def(0));
2718 }
2719
2720 void
emitRED()2721 CodeEmitterGM107::emitRED()
2722 {
2723 unsigned dType;
2724
2725 switch (insn->dType) {
2726 case TYPE_U32: dType = 0; break;
2727 case TYPE_S32: dType = 1; break;
2728 case TYPE_U64: dType = 2; break;
2729 case TYPE_F32: dType = 3; break;
2730 case TYPE_B128: dType = 4; break;
2731 case TYPE_S64: dType = 5; break;
2732 default: assert(!"unexpected dType"); dType = 0; break;
2733 }
2734
2735 emitInsn (0xebf80000);
2736 emitField(0x30, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2737 emitField(0x17, 3, insn->subOp);
2738 emitField(0x14, 3, dType);
2739 emitADDR (0x08, 0x1c, 20, 0, insn->src(0));
2740 emitGPR (0x00, insn->src(1));
2741 }
2742
2743 void
emitCCTL()2744 CodeEmitterGM107::emitCCTL()
2745 {
2746 unsigned width;
2747 if (insn->src(0).getFile() == FILE_MEMORY_GLOBAL) {
2748 emitInsn(0xef600000);
2749 width = 30;
2750 } else {
2751 emitInsn(0xef800000);
2752 width = 22;
2753 }
2754 emitField(0x34, 1, insn->src(0).getIndirect(0)->getSize() == 8);
2755 emitADDR (0x08, 0x16, width, 2, insn->src(0));
2756 emitField(0x00, 4, insn->subOp);
2757 }
2758
2759 /*******************************************************************************
2760 * surface
2761 ******************************************************************************/
2762
2763 void
emitPIXLD()2764 CodeEmitterGM107::emitPIXLD()
2765 {
2766 emitInsn (0xefe80000);
2767 emitPRED (0x2d);
2768 emitField(0x1f, 3, insn->subOp);
2769 emitGPR (0x08, insn->src(0));
2770 emitGPR (0x00, insn->def(0));
2771 }
2772
2773 /*******************************************************************************
2774 * texture
2775 ******************************************************************************/
2776
2777 void
emitTEXs(int pos)2778 CodeEmitterGM107::emitTEXs(int pos)
2779 {
2780 int src1 = insn->predSrc == 1 ? 2 : 1;
2781 if (insn->srcExists(src1))
2782 emitGPR(pos, insn->src(src1));
2783 else
2784 emitGPR(pos);
2785 }
2786
2787 static uint8_t
getTEXSMask(uint8_t mask)2788 getTEXSMask(uint8_t mask)
2789 {
2790 switch (mask) {
2791 case 0x1: return 0x0;
2792 case 0x2: return 0x1;
2793 case 0x3: return 0x4;
2794 case 0x4: return 0x2;
2795 case 0x7: return 0x0;
2796 case 0x8: return 0x3;
2797 case 0x9: return 0x5;
2798 case 0xa: return 0x6;
2799 case 0xb: return 0x1;
2800 case 0xc: return 0x7;
2801 case 0xd: return 0x2;
2802 case 0xe: return 0x3;
2803 case 0xf: return 0x4;
2804 default:
2805 assert(!"invalid mask");
2806 return 0;
2807 }
2808 }
2809
2810 static uint8_t
getTEXSTarget(const TexInstruction * tex)2811 getTEXSTarget(const TexInstruction *tex)
2812 {
2813 assert(tex->op == OP_TEX || tex->op == OP_TXL);
2814
2815 switch (tex->tex.target.getEnum()) {
2816 case TEX_TARGET_1D:
2817 assert(tex->tex.levelZero);
2818 return 0x0;
2819 case TEX_TARGET_2D:
2820 case TEX_TARGET_RECT:
2821 if (tex->tex.levelZero)
2822 return 0x2;
2823 if (tex->op == OP_TXL)
2824 return 0x3;
2825 return 0x1;
2826 case TEX_TARGET_2D_SHADOW:
2827 case TEX_TARGET_RECT_SHADOW:
2828 if (tex->tex.levelZero)
2829 return 0x6;
2830 if (tex->op == OP_TXL)
2831 return 0x5;
2832 return 0x4;
2833 case TEX_TARGET_2D_ARRAY:
2834 if (tex->tex.levelZero)
2835 return 0x8;
2836 return 0x7;
2837 case TEX_TARGET_2D_ARRAY_SHADOW:
2838 assert(tex->tex.levelZero);
2839 return 0x9;
2840 case TEX_TARGET_3D:
2841 if (tex->tex.levelZero)
2842 return 0xb;
2843 assert(tex->op != OP_TXL);
2844 return 0xa;
2845 case TEX_TARGET_CUBE:
2846 assert(!tex->tex.levelZero);
2847 if (tex->op == OP_TXL)
2848 return 0xd;
2849 return 0xc;
2850 default:
2851 assert(false);
2852 return 0x0;
2853 }
2854 }
2855
2856 static uint8_t
getTLDSTarget(const TexInstruction * tex)2857 getTLDSTarget(const TexInstruction *tex)
2858 {
2859 switch (tex->tex.target.getEnum()) {
2860 case TEX_TARGET_1D:
2861 if (tex->tex.levelZero)
2862 return 0x0;
2863 return 0x1;
2864 case TEX_TARGET_2D:
2865 case TEX_TARGET_RECT:
2866 if (tex->tex.levelZero)
2867 return tex->tex.useOffsets ? 0x4 : 0x2;
2868 return tex->tex.useOffsets ? 0xc : 0x5;
2869 case TEX_TARGET_2D_MS:
2870 assert(tex->tex.levelZero);
2871 return 0x6;
2872 case TEX_TARGET_3D:
2873 assert(tex->tex.levelZero);
2874 return 0x7;
2875 case TEX_TARGET_2D_ARRAY:
2876 assert(tex->tex.levelZero);
2877 return 0x8;
2878
2879 default:
2880 assert(false);
2881 return 0x0;
2882 }
2883 }
2884
2885 void
emitTEX()2886 CodeEmitterGM107::emitTEX()
2887 {
2888 const TexInstruction *insn = this->insn->asTex();
2889 int lodm = 0;
2890
2891 if (!insn->tex.levelZero) {
2892 switch (insn->op) {
2893 case OP_TEX: lodm = 0; break;
2894 case OP_TXB: lodm = 2; break;
2895 case OP_TXL: lodm = 3; break;
2896 default:
2897 assert(!"invalid tex op");
2898 break;
2899 }
2900 } else {
2901 lodm = 1;
2902 }
2903
2904 if (insn->tex.rIndirectSrc >= 0) {
2905 emitInsn (0xdeb80000);
2906 emitField(0x25, 2, lodm);
2907 emitField(0x24, 1, insn->tex.useOffsets == 1);
2908 } else {
2909 emitInsn (0xc0380000);
2910 emitField(0x37, 2, lodm);
2911 emitField(0x36, 1, insn->tex.useOffsets == 1);
2912 emitField(0x24, 13, insn->tex.r);
2913 }
2914
2915 emitField(0x32, 1, insn->tex.target.isShadow());
2916 emitField(0x31, 1, insn->tex.liveOnly);
2917 emitField(0x23, 1, insn->tex.derivAll);
2918 emitField(0x1f, 4, insn->tex.mask);
2919 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2920 insn->tex.target.getDim() - 1);
2921 emitField(0x1c, 1, insn->tex.target.isArray());
2922 emitTEXs (0x14);
2923 emitGPR (0x08, insn->src(0));
2924 emitGPR (0x00, insn->def(0));
2925 }
2926
2927 void
emitTEXS()2928 CodeEmitterGM107::emitTEXS()
2929 {
2930 const TexInstruction *insn = this->insn->asTex();
2931 assert(!insn->tex.derivAll);
2932
2933 switch (insn->op) {
2934 case OP_TEX:
2935 case OP_TXL:
2936 emitInsn (0xd8000000);
2937 emitField(0x35, 4, getTEXSTarget(insn));
2938 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2939 break;
2940 case OP_TXF:
2941 emitInsn (0xda000000);
2942 emitField(0x35, 4, getTLDSTarget(insn));
2943 emitField(0x32, 3, getTEXSMask(insn->tex.mask));
2944 break;
2945 case OP_TXG:
2946 assert(insn->tex.useOffsets != 4);
2947 emitInsn (0xdf000000);
2948 emitField(0x34, 2, insn->tex.gatherComp);
2949 emitField(0x33, 1, insn->tex.useOffsets == 1);
2950 emitField(0x32, 1, insn->tex.target.isShadow());
2951 break;
2952 default:
2953 unreachable("unknown op in emitTEXS()");
2954 break;
2955 }
2956
2957 emitField(0x31, 1, insn->tex.liveOnly);
2958 emitField(0x24, 13, insn->tex.r);
2959 if (insn->defExists(1))
2960 emitGPR(0x1c, insn->def(1));
2961 else
2962 emitGPR(0x1c);
2963 if (insn->srcExists(1))
2964 emitGPR(0x14, insn->getSrc(1));
2965 else
2966 emitGPR(0x14);
2967 emitGPR (0x08, insn->src(0));
2968 emitGPR (0x00, insn->def(0));
2969 }
2970
2971 void
emitTLD()2972 CodeEmitterGM107::emitTLD()
2973 {
2974 const TexInstruction *insn = this->insn->asTex();
2975
2976 if (insn->tex.rIndirectSrc >= 0) {
2977 emitInsn (0xdd380000);
2978 } else {
2979 emitInsn (0xdc380000);
2980 emitField(0x24, 13, insn->tex.r);
2981 }
2982
2983 emitField(0x37, 1, insn->tex.levelZero == 0);
2984 emitField(0x32, 1, insn->tex.target.isMS());
2985 emitField(0x31, 1, insn->tex.liveOnly);
2986 emitField(0x23, 1, insn->tex.useOffsets == 1);
2987 emitField(0x1f, 4, insn->tex.mask);
2988 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
2989 insn->tex.target.getDim() - 1);
2990 emitField(0x1c, 1, insn->tex.target.isArray());
2991 emitTEXs (0x14);
2992 emitGPR (0x08, insn->src(0));
2993 emitGPR (0x00, insn->def(0));
2994 }
2995
2996 void
emitTLD4()2997 CodeEmitterGM107::emitTLD4()
2998 {
2999 const TexInstruction *insn = this->insn->asTex();
3000
3001 if (insn->tex.rIndirectSrc >= 0) {
3002 emitInsn (0xdef80000);
3003 emitField(0x26, 2, insn->tex.gatherComp);
3004 emitField(0x25, 2, insn->tex.useOffsets == 4);
3005 emitField(0x24, 2, insn->tex.useOffsets == 1);
3006 } else {
3007 emitInsn (0xc8380000);
3008 emitField(0x38, 2, insn->tex.gatherComp);
3009 emitField(0x37, 2, insn->tex.useOffsets == 4);
3010 emitField(0x36, 2, insn->tex.useOffsets == 1);
3011 emitField(0x24, 13, insn->tex.r);
3012 }
3013
3014 emitField(0x32, 1, insn->tex.target.isShadow());
3015 emitField(0x31, 1, insn->tex.liveOnly);
3016 emitField(0x23, 1, insn->tex.derivAll);
3017 emitField(0x1f, 4, insn->tex.mask);
3018 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3019 insn->tex.target.getDim() - 1);
3020 emitField(0x1c, 1, insn->tex.target.isArray());
3021 emitTEXs (0x14);
3022 emitGPR (0x08, insn->src(0));
3023 emitGPR (0x00, insn->def(0));
3024 }
3025
3026 void
emitTXD()3027 CodeEmitterGM107::emitTXD()
3028 {
3029 const TexInstruction *insn = this->insn->asTex();
3030
3031 if (insn->tex.rIndirectSrc >= 0) {
3032 emitInsn (0xde780000);
3033 } else {
3034 emitInsn (0xde380000);
3035 emitField(0x24, 13, insn->tex.r);
3036 }
3037
3038 emitField(0x31, 1, insn->tex.liveOnly);
3039 emitField(0x23, 1, insn->tex.useOffsets == 1);
3040 emitField(0x1f, 4, insn->tex.mask);
3041 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3042 insn->tex.target.getDim() - 1);
3043 emitField(0x1c, 1, insn->tex.target.isArray());
3044 emitTEXs (0x14);
3045 emitGPR (0x08, insn->src(0));
3046 emitGPR (0x00, insn->def(0));
3047 }
3048
3049 void
emitTMML()3050 CodeEmitterGM107::emitTMML()
3051 {
3052 const TexInstruction *insn = this->insn->asTex();
3053
3054 if (insn->tex.rIndirectSrc >= 0) {
3055 emitInsn (0xdf600000);
3056 } else {
3057 emitInsn (0xdf580000);
3058 emitField(0x24, 13, insn->tex.r);
3059 }
3060
3061 emitField(0x31, 1, insn->tex.liveOnly);
3062 emitField(0x23, 1, insn->tex.derivAll);
3063 emitField(0x1f, 4, insn->tex.mask);
3064 emitField(0x1d, 2, insn->tex.target.isCube() ? 3 :
3065 insn->tex.target.getDim() - 1);
3066 emitField(0x1c, 1, insn->tex.target.isArray());
3067 emitTEXs (0x14);
3068 emitGPR (0x08, insn->src(0));
3069 emitGPR (0x00, insn->def(0));
3070 }
3071
3072 void
emitTXQ()3073 CodeEmitterGM107::emitTXQ()
3074 {
3075 const TexInstruction *insn = this->insn->asTex();
3076 int type = 0;
3077
3078 switch (insn->tex.query) {
3079 case TXQ_DIMS : type = 0x01; break;
3080 case TXQ_TYPE : type = 0x02; break;
3081 case TXQ_SAMPLE_POSITION: type = 0x05; break;
3082 case TXQ_FILTER : type = 0x10; break;
3083 case TXQ_LOD : type = 0x12; break;
3084 case TXQ_WRAP : type = 0x14; break;
3085 case TXQ_BORDER_COLOUR : type = 0x16; break;
3086 default:
3087 assert(!"invalid txq query");
3088 break;
3089 }
3090
3091 if (insn->tex.rIndirectSrc >= 0) {
3092 emitInsn (0xdf500000);
3093 } else {
3094 emitInsn (0xdf480000);
3095 emitField(0x24, 13, insn->tex.r);
3096 }
3097
3098 emitField(0x31, 1, insn->tex.liveOnly);
3099 emitField(0x1f, 4, insn->tex.mask);
3100 emitField(0x16, 6, type);
3101 emitGPR (0x08, insn->src(0));
3102 emitGPR (0x00, insn->def(0));
3103 }
3104
3105 void
emitDEPBAR()3106 CodeEmitterGM107::emitDEPBAR()
3107 {
3108 emitInsn (0xf0f00000);
3109 emitField(0x1d, 1, 1); /* le */
3110 emitField(0x1a, 3, 5);
3111 emitField(0x14, 6, insn->subOp);
3112 emitField(0x00, 6, insn->subOp);
3113 }
3114
3115 /*******************************************************************************
3116 * misc
3117 ******************************************************************************/
3118
3119 void
emitNOP()3120 CodeEmitterGM107::emitNOP()
3121 {
3122 emitInsn(0x50b00000);
3123 }
3124
3125 void
emitKIL()3126 CodeEmitterGM107::emitKIL()
3127 {
3128 emitInsn (0xe3300000);
3129 emitCond5(0x00, CC_TR);
3130 }
3131
3132 void
emitOUT()3133 CodeEmitterGM107::emitOUT()
3134 {
3135 const int cut = insn->op == OP_RESTART || insn->subOp;
3136 const int emit = insn->op == OP_EMIT;
3137
3138 switch (insn->src(1).getFile()) {
3139 case FILE_GPR:
3140 emitInsn(0xfbe00000);
3141 emitGPR (0x14, insn->src(1));
3142 break;
3143 case FILE_IMMEDIATE:
3144 emitInsn(0xf6e00000);
3145 emitIMMD(0x14, 19, insn->src(1));
3146 break;
3147 case FILE_MEMORY_CONST:
3148 emitInsn(0xebe00000);
3149 emitCBUF(0x22, -1, 0x14, 16, 2, insn->src(1));
3150 break;
3151 default:
3152 assert(!"bad src1 file");
3153 break;
3154 }
3155
3156 emitField(0x27, 2, (cut << 1) | emit);
3157 emitGPR (0x08, insn->src(0));
3158 emitGPR (0x00, insn->def(0));
3159 }
3160
3161 void
emitBAR()3162 CodeEmitterGM107::emitBAR()
3163 {
3164 uint8_t subop;
3165
3166 emitInsn (0xf0a80000);
3167
3168 switch (insn->subOp) {
3169 case NV50_IR_SUBOP_BAR_RED_POPC: subop = 0x02; break;
3170 case NV50_IR_SUBOP_BAR_RED_AND: subop = 0x0a; break;
3171 case NV50_IR_SUBOP_BAR_RED_OR: subop = 0x12; break;
3172 case NV50_IR_SUBOP_BAR_ARRIVE: subop = 0x81; break;
3173 default:
3174 subop = 0x80;
3175 assert(insn->subOp == NV50_IR_SUBOP_BAR_SYNC);
3176 break;
3177 }
3178
3179 emitField(0x20, 8, subop);
3180
3181 // barrier id
3182 if (insn->src(0).getFile() == FILE_GPR) {
3183 emitGPR(0x08, insn->src(0));
3184 } else {
3185 ImmediateValue *imm = insn->getSrc(0)->asImm();
3186 assert(imm);
3187 emitField(0x08, 8, imm->reg.data.u32);
3188 emitField(0x2b, 1, 1);
3189 }
3190
3191 // thread count
3192 if (insn->src(1).getFile() == FILE_GPR) {
3193 emitGPR(0x14, insn->src(1));
3194 } else {
3195 ImmediateValue *imm = insn->getSrc(0)->asImm();
3196 assert(imm);
3197 emitField(0x14, 12, imm->reg.data.u32);
3198 emitField(0x2c, 1, 1);
3199 }
3200
3201 if (insn->srcExists(2) && (insn->predSrc != 2)) {
3202 emitPRED (0x27, insn->src(2));
3203 emitField(0x2a, 1, insn->src(2).mod == Modifier(NV50_IR_MOD_NOT));
3204 } else {
3205 emitField(0x27, 3, 7);
3206 }
3207 }
3208
3209 void
emitMEMBAR()3210 CodeEmitterGM107::emitMEMBAR()
3211 {
3212 emitInsn (0xef980000);
3213 emitField(0x08, 2, insn->subOp >> 2);
3214 }
3215
3216 void
emitVOTE()3217 CodeEmitterGM107::emitVOTE()
3218 {
3219 const ImmediateValue *imm;
3220 uint32_t u32;
3221
3222 int r = -1, p = -1;
3223 for (int i = 0; insn->defExists(i); i++) {
3224 if (insn->def(i).getFile() == FILE_GPR)
3225 r = i;
3226 else if (insn->def(i).getFile() == FILE_PREDICATE)
3227 p = i;
3228 }
3229
3230 emitInsn (0x50d80000);
3231 emitField(0x30, 2, insn->subOp);
3232 if (r >= 0)
3233 emitGPR (0x00, insn->def(r));
3234 else
3235 emitGPR (0x00);
3236 if (p >= 0)
3237 emitPRED (0x2d, insn->def(p));
3238 else
3239 emitPRED (0x2d);
3240
3241 switch (insn->src(0).getFile()) {
3242 case FILE_PREDICATE:
3243 emitField(0x2a, 1, insn->src(0).mod == Modifier(NV50_IR_MOD_NOT));
3244 emitPRED (0x27, insn->src(0));
3245 break;
3246 case FILE_IMMEDIATE:
3247 imm = insn->getSrc(0)->asImm();
3248 assert(imm);
3249 u32 = imm->reg.data.u32;
3250 assert(u32 == 0 || u32 == 1);
3251 emitPRED(0x27);
3252 emitField(0x2a, 1, u32 == 0);
3253 break;
3254 default:
3255 assert(!"Unhandled src");
3256 break;
3257 }
3258 }
3259
3260 void
emitSUTarget()3261 CodeEmitterGM107::emitSUTarget()
3262 {
3263 const TexInstruction *insn = this->insn->asTex();
3264 int target = 0;
3265
3266 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3267
3268 if (insn->tex.target == TEX_TARGET_BUFFER) {
3269 target = 2;
3270 } else if (insn->tex.target == TEX_TARGET_1D_ARRAY) {
3271 target = 4;
3272 } else if (insn->tex.target == TEX_TARGET_2D ||
3273 insn->tex.target == TEX_TARGET_RECT) {
3274 target = 6;
3275 } else if (insn->tex.target == TEX_TARGET_2D_ARRAY ||
3276 insn->tex.target == TEX_TARGET_CUBE ||
3277 insn->tex.target == TEX_TARGET_CUBE_ARRAY) {
3278 target = 8;
3279 } else if (insn->tex.target == TEX_TARGET_3D) {
3280 target = 10;
3281 } else {
3282 assert(insn->tex.target == TEX_TARGET_1D);
3283 }
3284 emitField(0x20, 4, target);
3285 }
3286
3287 void
emitSUHandle(const int s)3288 CodeEmitterGM107::emitSUHandle(const int s)
3289 {
3290 const TexInstruction *insn = this->insn->asTex();
3291
3292 assert(insn->op >= OP_SULDB && insn->op <= OP_SUREDP);
3293
3294 if (insn->src(s).getFile() == FILE_GPR) {
3295 emitGPR(0x27, insn->src(s));
3296 } else {
3297 ImmediateValue *imm = insn->getSrc(s)->asImm();
3298 assert(imm);
3299 emitField(0x33, 1, 1);
3300 emitField(0x24, 13, imm->reg.data.u32);
3301 }
3302 }
3303
3304 void
emitSUSTx()3305 CodeEmitterGM107::emitSUSTx()
3306 {
3307 const TexInstruction *insn = this->insn->asTex();
3308
3309 emitInsn(0xeb200000);
3310 if (insn->op == OP_SUSTB)
3311 emitField(0x34, 1, 1);
3312 emitSUTarget();
3313
3314 emitLDSTc(0x18);
3315 emitField(0x14, 4, 0xf); // rgba
3316 emitGPR (0x08, insn->src(0));
3317 emitGPR (0x00, insn->src(1));
3318
3319 emitSUHandle(2);
3320 }
3321
3322 void
emitSULDx()3323 CodeEmitterGM107::emitSULDx()
3324 {
3325 const TexInstruction *insn = this->insn->asTex();
3326
3327 emitInsn(0xeb000000);
3328
3329 if (insn->op == OP_SULDB) {
3330 int type = 0;
3331 emitField(0x34, 1, 1);
3332 switch (insn->dType) {
3333 case TYPE_S8: type = 1; break;
3334 case TYPE_U16: type = 2; break;
3335 case TYPE_S16: type = 3; break;
3336 case TYPE_U32: type = 4; break;
3337 case TYPE_U64: type = 5; break;
3338 case TYPE_B128: type = 6; break;
3339 default:
3340 assert(insn->dType == TYPE_U8);
3341 break;
3342 }
3343 emitField(0x14, 3, type);
3344 } else {
3345 emitField(0x14, 4, 0xf); // rgba
3346 }
3347
3348 emitSUTarget();
3349 emitLDSTc(0x18);
3350 emitGPR (0x00, insn->def(0));
3351 emitGPR (0x08, insn->src(0));
3352
3353 emitSUHandle(1);
3354 }
3355
3356 void
emitSUREDx()3357 CodeEmitterGM107::emitSUREDx()
3358 {
3359 const TexInstruction *insn = this->insn->asTex();
3360 uint8_t type = 0, subOp;
3361
3362 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS)
3363 emitInsn(0xeac00000);
3364 else
3365 emitInsn(0xea600000);
3366
3367 if (insn->op == OP_SUREDB)
3368 emitField(0x34, 1, 1);
3369 emitSUTarget();
3370
3371 // destination type
3372 switch (insn->dType) {
3373 case TYPE_S32: type = 1; break;
3374 case TYPE_U64: type = 2; break;
3375 case TYPE_F32: type = 3; break;
3376 case TYPE_S64: type = 5; break;
3377 default:
3378 assert(insn->dType == TYPE_U32);
3379 break;
3380 }
3381
3382 // atomic operation
3383 if (insn->subOp == NV50_IR_SUBOP_ATOM_CAS) {
3384 subOp = 0;
3385 } else if (insn->subOp == NV50_IR_SUBOP_ATOM_EXCH) {
3386 subOp = 8;
3387 } else {
3388 subOp = insn->subOp;
3389 }
3390
3391 emitField(0x24, 3, type);
3392 emitField(0x1d, 4, subOp);
3393 emitGPR (0x14, insn->src(1));
3394 emitGPR (0x08, insn->src(0));
3395 emitGPR (0x00, insn->def(0));
3396
3397 emitSUHandle(2);
3398 }
3399
3400 /*******************************************************************************
3401 * assembler front-end
3402 ******************************************************************************/
3403
3404 bool
emitInstruction(Instruction * i)3405 CodeEmitterGM107::emitInstruction(Instruction *i)
3406 {
3407 const unsigned int size = (writeIssueDelays && !(codeSize & 0x1f)) ? 16 : 8;
3408 bool ret = true;
3409
3410 insn = i;
3411
3412 if (insn->encSize != 8) {
3413 ERROR("skipping undecodable instruction: "); insn->print();
3414 return false;
3415 } else
3416 if (codeSize + size > codeSizeLimit) {
3417 ERROR("code emitter output buffer too small\n");
3418 return false;
3419 }
3420
3421 if (writeIssueDelays) {
3422 int n = ((codeSize & 0x1f) / 8) - 1;
3423 if (n < 0) {
3424 data = code;
3425 data[0] = 0x00000000;
3426 data[1] = 0x00000000;
3427 code += 2;
3428 codeSize += 8;
3429 n++;
3430 }
3431
3432 emitField(data, n * 21, 21, insn->sched);
3433 }
3434
3435 switch (insn->op) {
3436 case OP_EXIT:
3437 emitEXIT();
3438 break;
3439 case OP_BRA:
3440 emitBRA();
3441 break;
3442 case OP_CALL:
3443 emitCAL();
3444 break;
3445 case OP_PRECONT:
3446 emitPCNT();
3447 break;
3448 case OP_CONT:
3449 emitCONT();
3450 break;
3451 case OP_PREBREAK:
3452 emitPBK();
3453 break;
3454 case OP_BREAK:
3455 emitBRK();
3456 break;
3457 case OP_PRERET:
3458 emitPRET();
3459 break;
3460 case OP_RET:
3461 emitRET();
3462 break;
3463 case OP_JOINAT:
3464 emitSSY();
3465 break;
3466 case OP_JOIN:
3467 emitSYNC();
3468 break;
3469 case OP_QUADON:
3470 emitSAM();
3471 break;
3472 case OP_QUADPOP:
3473 emitRAM();
3474 break;
3475 case OP_MOV:
3476 emitMOV();
3477 break;
3478 case OP_RDSV:
3479 if (targGM107->isCS2RSV(insn->getSrc(0)->reg.data.sv.sv))
3480 emitCS2R();
3481 else
3482 emitS2R();
3483 break;
3484 case OP_ABS:
3485 case OP_NEG:
3486 case OP_SAT:
3487 case OP_FLOOR:
3488 case OP_CEIL:
3489 case OP_TRUNC:
3490 case OP_CVT:
3491 if (insn->op == OP_CVT && (insn->def(0).getFile() == FILE_PREDICATE ||
3492 insn->src(0).getFile() == FILE_PREDICATE)) {
3493 emitMOV();
3494 } else if (isFloatType(insn->dType)) {
3495 if (isFloatType(insn->sType))
3496 emitF2F();
3497 else
3498 emitI2F();
3499 } else {
3500 if (isFloatType(insn->sType))
3501 emitF2I();
3502 else
3503 emitI2I();
3504 }
3505 break;
3506 case OP_SHFL:
3507 emitSHFL();
3508 break;
3509 case OP_ADD:
3510 case OP_SUB:
3511 if (isFloatType(insn->dType)) {
3512 if (insn->dType == TYPE_F64)
3513 emitDADD();
3514 else
3515 emitFADD();
3516 } else {
3517 emitIADD();
3518 }
3519 break;
3520 case OP_MUL:
3521 if (isFloatType(insn->dType)) {
3522 if (insn->dType == TYPE_F64)
3523 emitDMUL();
3524 else
3525 emitFMUL();
3526 } else {
3527 emitIMUL();
3528 }
3529 break;
3530 case OP_MAD:
3531 case OP_FMA:
3532 if (isFloatType(insn->dType)) {
3533 if (insn->dType == TYPE_F64)
3534 emitDFMA();
3535 else
3536 emitFFMA();
3537 } else {
3538 emitIMAD();
3539 }
3540 break;
3541 case OP_SHLADD:
3542 emitISCADD();
3543 break;
3544 case OP_XMAD:
3545 emitXMAD();
3546 break;
3547 case OP_MIN:
3548 case OP_MAX:
3549 if (isFloatType(insn->dType)) {
3550 if (insn->dType == TYPE_F64)
3551 emitDMNMX();
3552 else
3553 emitFMNMX();
3554 } else {
3555 emitIMNMX();
3556 }
3557 break;
3558 case OP_SHL:
3559 if (typeSizeof(insn->sType) == 8)
3560 emitSHF();
3561 else
3562 emitSHL();
3563 break;
3564 case OP_SHR:
3565 if (typeSizeof(insn->sType) == 8)
3566 emitSHF();
3567 else
3568 emitSHR();
3569 break;
3570 case OP_POPCNT:
3571 emitPOPC();
3572 break;
3573 case OP_INSBF:
3574 emitBFI();
3575 break;
3576 case OP_EXTBF:
3577 emitBFE();
3578 break;
3579 case OP_BFIND:
3580 emitFLO();
3581 break;
3582 case OP_PERMT:
3583 emitPRMT();
3584 break;
3585 case OP_SLCT:
3586 if (isFloatType(insn->dType))
3587 emitFCMP();
3588 else
3589 emitICMP();
3590 break;
3591 case OP_SET:
3592 case OP_SET_AND:
3593 case OP_SET_OR:
3594 case OP_SET_XOR:
3595 if (insn->def(0).getFile() != FILE_PREDICATE) {
3596 if (isFloatType(insn->sType))
3597 if (insn->sType == TYPE_F64)
3598 emitDSET();
3599 else
3600 emitFSET();
3601 else
3602 emitISET();
3603 } else {
3604 if (isFloatType(insn->sType))
3605 if (insn->sType == TYPE_F64)
3606 emitDSETP();
3607 else
3608 emitFSETP();
3609 else
3610 emitISETP();
3611 }
3612 break;
3613 case OP_SELP:
3614 emitSEL();
3615 break;
3616 case OP_PRESIN:
3617 case OP_PREEX2:
3618 emitRRO();
3619 break;
3620 case OP_COS:
3621 case OP_SIN:
3622 case OP_EX2:
3623 case OP_LG2:
3624 case OP_RCP:
3625 case OP_RSQ:
3626 case OP_SQRT:
3627 emitMUFU();
3628 break;
3629 case OP_AND:
3630 case OP_OR:
3631 case OP_XOR:
3632 switch (insn->def(0).getFile()) {
3633 case FILE_GPR: emitLOP(); break;
3634 case FILE_PREDICATE: emitPSETP(); break;
3635 default:
3636 assert(!"invalid bool op");
3637 }
3638 break;
3639 case OP_NOT:
3640 emitNOT();
3641 break;
3642 case OP_LOAD:
3643 switch (insn->src(0).getFile()) {
3644 case FILE_MEMORY_CONST : emitLDC(); break;
3645 case FILE_MEMORY_LOCAL : emitLDL(); break;
3646 case FILE_MEMORY_SHARED: emitLDS(); break;
3647 case FILE_MEMORY_GLOBAL: emitLD(); break;
3648 default:
3649 assert(!"invalid load");
3650 emitNOP();
3651 break;
3652 }
3653 break;
3654 case OP_STORE:
3655 switch (insn->src(0).getFile()) {
3656 case FILE_MEMORY_LOCAL : emitSTL(); break;
3657 case FILE_MEMORY_SHARED: emitSTS(); break;
3658 case FILE_MEMORY_GLOBAL: emitST(); break;
3659 default:
3660 assert(!"invalid store");
3661 emitNOP();
3662 break;
3663 }
3664 break;
3665 case OP_ATOM:
3666 if (insn->src(0).getFile() == FILE_MEMORY_SHARED)
3667 emitATOMS();
3668 else
3669 if (!insn->defExists(0) && insn->subOp < NV50_IR_SUBOP_ATOM_CAS)
3670 emitRED();
3671 else
3672 emitATOM();
3673 break;
3674 case OP_CCTL:
3675 emitCCTL();
3676 break;
3677 case OP_VFETCH:
3678 emitALD();
3679 break;
3680 case OP_EXPORT:
3681 emitAST();
3682 break;
3683 case OP_PFETCH:
3684 emitISBERD();
3685 break;
3686 case OP_AFETCH:
3687 emitAL2P();
3688 break;
3689 case OP_LINTERP:
3690 case OP_PINTERP:
3691 emitIPA();
3692 break;
3693 case OP_PIXLD:
3694 emitPIXLD();
3695 break;
3696 case OP_TEX:
3697 case OP_TXL:
3698 if (insn->asTex()->tex.scalar)
3699 emitTEXS();
3700 else
3701 emitTEX();
3702 break;
3703 case OP_TXB:
3704 emitTEX();
3705 break;
3706 case OP_TXF:
3707 if (insn->asTex()->tex.scalar)
3708 emitTEXS();
3709 else
3710 emitTLD();
3711 break;
3712 case OP_TXG:
3713 if (insn->asTex()->tex.scalar)
3714 emitTEXS();
3715 else
3716 emitTLD4();
3717 break;
3718 case OP_TXD:
3719 emitTXD();
3720 break;
3721 case OP_TXQ:
3722 emitTXQ();
3723 break;
3724 case OP_TXLQ:
3725 emitTMML();
3726 break;
3727 case OP_TEXBAR:
3728 emitDEPBAR();
3729 break;
3730 case OP_QUADOP:
3731 emitFSWZADD();
3732 break;
3733 case OP_NOP:
3734 emitNOP();
3735 break;
3736 case OP_DISCARD:
3737 emitKIL();
3738 break;
3739 case OP_EMIT:
3740 case OP_RESTART:
3741 emitOUT();
3742 break;
3743 case OP_BAR:
3744 emitBAR();
3745 break;
3746 case OP_MEMBAR:
3747 emitMEMBAR();
3748 break;
3749 case OP_VOTE:
3750 emitVOTE();
3751 break;
3752 case OP_SUSTB:
3753 case OP_SUSTP:
3754 emitSUSTx();
3755 break;
3756 case OP_SULDB:
3757 case OP_SULDP:
3758 emitSULDx();
3759 break;
3760 case OP_SUREDB:
3761 case OP_SUREDP:
3762 emitSUREDx();
3763 break;
3764 default:
3765 assert(!"invalid opcode");
3766 emitNOP();
3767 ret = false;
3768 break;
3769 }
3770
3771 if (insn->join) {
3772 /*XXX*/
3773 }
3774
3775 code += 2;
3776 codeSize += 8;
3777 return ret;
3778 }
3779
3780 uint32_t
getMinEncodingSize(const Instruction * i) const3781 CodeEmitterGM107::getMinEncodingSize(const Instruction *i) const
3782 {
3783 return 8;
3784 }
3785
3786 /*******************************************************************************
3787 * sched data calculator
3788 ******************************************************************************/
3789
3790 inline void
emitStall(Instruction * insn,uint8_t cnt)3791 SchedDataCalculatorGM107::emitStall(Instruction *insn, uint8_t cnt)
3792 {
3793 assert(cnt < 16);
3794 insn->sched |= cnt;
3795 }
3796
3797 inline void
emitYield(Instruction * insn)3798 SchedDataCalculatorGM107::emitYield(Instruction *insn)
3799 {
3800 insn->sched |= 1 << 4;
3801 }
3802
3803 inline void
emitWrDepBar(Instruction * insn,uint8_t id)3804 SchedDataCalculatorGM107::emitWrDepBar(Instruction *insn, uint8_t id)
3805 {
3806 assert(id < 6);
3807 if ((insn->sched & 0xe0) == 0xe0)
3808 insn->sched ^= 0xe0;
3809 insn->sched |= id << 5;
3810 }
3811
3812 inline void
emitRdDepBar(Instruction * insn,uint8_t id)3813 SchedDataCalculatorGM107::emitRdDepBar(Instruction *insn, uint8_t id)
3814 {
3815 assert(id < 6);
3816 if ((insn->sched & 0x700) == 0x700)
3817 insn->sched ^= 0x700;
3818 insn->sched |= id << 8;
3819 }
3820
3821 inline void
emitWtDepBar(Instruction * insn,uint8_t id)3822 SchedDataCalculatorGM107::emitWtDepBar(Instruction *insn, uint8_t id)
3823 {
3824 assert(id < 6);
3825 insn->sched |= 1 << (11 + id);
3826 }
3827
3828 inline void
emitReuse(Instruction * insn,uint8_t id)3829 SchedDataCalculatorGM107::emitReuse(Instruction *insn, uint8_t id)
3830 {
3831 assert(id < 4);
3832 insn->sched |= 1 << (17 + id);
3833 }
3834
3835 inline void
printSchedInfo(int cycle,const Instruction * insn) const3836 SchedDataCalculatorGM107::printSchedInfo(int cycle,
3837 const Instruction *insn) const
3838 {
3839 uint8_t st, yl, wr, rd, wt, ru;
3840
3841 st = (insn->sched & 0x00000f) >> 0;
3842 yl = (insn->sched & 0x000010) >> 4;
3843 wr = (insn->sched & 0x0000e0) >> 5;
3844 rd = (insn->sched & 0x000700) >> 8;
3845 wt = (insn->sched & 0x01f800) >> 11;
3846 ru = (insn->sched & 0x1e0000) >> 17;
3847
3848 INFO("cycle %i, (st 0x%x, yl 0x%x, wr 0x%x, rd 0x%x, wt 0x%x, ru 0x%x)\n",
3849 cycle, st, yl, wr, rd, wt, ru);
3850 }
3851
3852 inline int
getStall(const Instruction * insn) const3853 SchedDataCalculatorGM107::getStall(const Instruction *insn) const
3854 {
3855 return insn->sched & 0xf;
3856 }
3857
3858 inline int
getWrDepBar(const Instruction * insn) const3859 SchedDataCalculatorGM107::getWrDepBar(const Instruction *insn) const
3860 {
3861 return (insn->sched & 0x0000e0) >> 5;
3862 }
3863
3864 inline int
getRdDepBar(const Instruction * insn) const3865 SchedDataCalculatorGM107::getRdDepBar(const Instruction *insn) const
3866 {
3867 return (insn->sched & 0x000700) >> 8;
3868 }
3869
3870 inline int
getWtDepBar(const Instruction * insn) const3871 SchedDataCalculatorGM107::getWtDepBar(const Instruction *insn) const
3872 {
3873 return (insn->sched & 0x01f800) >> 11;
3874 }
3875
3876 // Emit the reuse flag which allows to make use of the new memory hierarchy
3877 // introduced since Maxwell, the operand reuse cache.
3878 //
3879 // It allows to reduce bank conflicts by caching operands. Each time you issue
3880 // an instruction, that flag can tell the hw which operands are going to be
3881 // re-used by the next instruction. Note that the next instruction has to use
3882 // the same GPR id in the same operand slot.
3883 void
setReuseFlag(Instruction * insn)3884 SchedDataCalculatorGM107::setReuseFlag(Instruction *insn)
3885 {
3886 Instruction *next = insn->next;
3887 BitSet defs(255, true);
3888
3889 if (!targ->isReuseSupported(insn))
3890 return;
3891
3892 for (int d = 0; insn->defExists(d); ++d) {
3893 const Value *def = insn->def(d).rep();
3894 if (insn->def(d).getFile() != FILE_GPR)
3895 continue;
3896 if (typeSizeof(insn->dType) != 4 || def->reg.data.id == 255)
3897 continue;
3898 defs.set(def->reg.data.id);
3899 }
3900
3901 for (int s = 0; insn->srcExists(s); s++) {
3902 const Value *src = insn->src(s).rep();
3903 if (insn->src(s).getFile() != FILE_GPR)
3904 continue;
3905 if (typeSizeof(insn->sType) != 4 || src->reg.data.id == 255)
3906 continue;
3907 if (defs.test(src->reg.data.id))
3908 continue;
3909 if (!next->srcExists(s) || next->src(s).getFile() != FILE_GPR)
3910 continue;
3911 if (src->reg.data.id != next->getSrc(s)->reg.data.id)
3912 continue;
3913 assert(s < 4);
3914 emitReuse(insn, s);
3915 }
3916 }
3917
3918 void
recordWr(const Value * v,int cycle,int ready)3919 SchedDataCalculatorGM107::recordWr(const Value *v, int cycle, int ready)
3920 {
3921 int a = v->reg.data.id, b;
3922
3923 switch (v->reg.file) {
3924 case FILE_GPR:
3925 b = a + v->reg.size / 4;
3926 for (int r = a; r < b; ++r)
3927 score->rd.r[r] = ready;
3928 break;
3929 case FILE_PREDICATE:
3930 // To immediately use a predicate set by any instructions, the minimum
3931 // number of stall counts is 13.
3932 score->rd.p[a] = cycle + 13;
3933 break;
3934 case FILE_FLAGS:
3935 score->rd.c = ready;
3936 break;
3937 default:
3938 break;
3939 }
3940 }
3941
3942 void
checkRd(const Value * v,int cycle,int & delay) const3943 SchedDataCalculatorGM107::checkRd(const Value *v, int cycle, int &delay) const
3944 {
3945 int a = v->reg.data.id, b;
3946 int ready = cycle;
3947
3948 switch (v->reg.file) {
3949 case FILE_GPR:
3950 b = a + v->reg.size / 4;
3951 for (int r = a; r < b; ++r)
3952 ready = MAX2(ready, score->rd.r[r]);
3953 break;
3954 case FILE_PREDICATE:
3955 ready = MAX2(ready, score->rd.p[a]);
3956 break;
3957 case FILE_FLAGS:
3958 ready = MAX2(ready, score->rd.c);
3959 break;
3960 default:
3961 break;
3962 }
3963 if (cycle < ready)
3964 delay = MAX2(delay, ready - cycle);
3965 }
3966
3967 void
commitInsn(const Instruction * insn,int cycle)3968 SchedDataCalculatorGM107::commitInsn(const Instruction *insn, int cycle)
3969 {
3970 const int ready = cycle + targ->getLatency(insn);
3971
3972 for (int d = 0; insn->defExists(d); ++d)
3973 recordWr(insn->getDef(d), cycle, ready);
3974
3975 #ifdef GM107_DEBUG_SCHED_DATA
3976 score->print(cycle);
3977 #endif
3978 }
3979
3980 #define GM107_MIN_ISSUE_DELAY 0x1
3981 #define GM107_MAX_ISSUE_DELAY 0xf
3982
3983 int
calcDelay(const Instruction * insn,int cycle) const3984 SchedDataCalculatorGM107::calcDelay(const Instruction *insn, int cycle) const
3985 {
3986 int delay = 0, ready = cycle;
3987
3988 for (int s = 0; insn->srcExists(s); ++s)
3989 checkRd(insn->getSrc(s), cycle, delay);
3990
3991 // TODO: make use of getReadLatency()!
3992
3993 return MAX2(delay, ready - cycle);
3994 }
3995
3996 void
setDelay(Instruction * insn,int delay,const Instruction * next)3997 SchedDataCalculatorGM107::setDelay(Instruction *insn, int delay,
3998 const Instruction *next)
3999 {
4000 const OpClass cl = targ->getOpClass(insn->op);
4001 int wr, rd;
4002
4003 if (insn->op == OP_EXIT ||
4004 insn->op == OP_BAR ||
4005 insn->op == OP_MEMBAR) {
4006 delay = GM107_MAX_ISSUE_DELAY;
4007 } else
4008 if (insn->op == OP_QUADON ||
4009 insn->op == OP_QUADPOP) {
4010 delay = 0xd;
4011 } else
4012 if (cl == OPCLASS_FLOW || insn->join) {
4013 delay = 0xd;
4014 }
4015
4016 if (!next || !targ->canDualIssue(insn, next)) {
4017 delay = CLAMP(delay, GM107_MIN_ISSUE_DELAY, GM107_MAX_ISSUE_DELAY);
4018 } else {
4019 delay = 0x0; // dual-issue
4020 }
4021
4022 wr = getWrDepBar(insn);
4023 rd = getRdDepBar(insn);
4024
4025 if (delay == GM107_MIN_ISSUE_DELAY && (wr & rd) != 7) {
4026 // Barriers take one additional clock cycle to become active on top of
4027 // the clock consumed by the instruction producing it.
4028 if (!next || insn->bb != next->bb) {
4029 delay = 0x2;
4030 } else {
4031 int wt = getWtDepBar(next);
4032 if ((wt & (1 << wr)) | (wt & (1 << rd)))
4033 delay = 0x2;
4034 }
4035 }
4036
4037 emitStall(insn, delay);
4038 }
4039
4040
4041 // Return true when the given instruction needs to emit a read dependency
4042 // barrier (for WaR hazards) because it doesn't operate at a fixed latency, and
4043 // setting the maximum number of stall counts is not enough.
4044 bool
needRdDepBar(const Instruction * insn) const4045 SchedDataCalculatorGM107::needRdDepBar(const Instruction *insn) const
4046 {
4047 BitSet srcs(255, true), defs(255, true);
4048 int a, b;
4049
4050 if (!targ->isBarrierRequired(insn))
4051 return false;
4052
4053 // Do not emit a read dependency barrier when the instruction doesn't use
4054 // any GPR (like st s[0x4] 0x0) as input because it's unnecessary.
4055 for (int s = 0; insn->srcExists(s); ++s) {
4056 const Value *src = insn->src(s).rep();
4057 if (insn->src(s).getFile() != FILE_GPR)
4058 continue;
4059 if (src->reg.data.id == 255)
4060 continue;
4061
4062 a = src->reg.data.id;
4063 b = a + src->reg.size / 4;
4064 for (int r = a; r < b; ++r)
4065 srcs.set(r);
4066 }
4067
4068 if (!srcs.popCount())
4069 return false;
4070
4071 // Do not emit a read dependency barrier when the output GPRs are equal to
4072 // the input GPRs (like rcp $r0 $r0) because a write dependency barrier will
4073 // be produced and WaR hazards are prevented.
4074 for (int d = 0; insn->defExists(d); ++d) {
4075 const Value *def = insn->def(d).rep();
4076 if (insn->def(d).getFile() != FILE_GPR)
4077 continue;
4078 if (def->reg.data.id == 255)
4079 continue;
4080
4081 a = def->reg.data.id;
4082 b = a + def->reg.size / 4;
4083 for (int r = a; r < b; ++r)
4084 defs.set(r);
4085 }
4086
4087 srcs.andNot(defs);
4088 if (!srcs.popCount())
4089 return false;
4090
4091 return true;
4092 }
4093
4094 // Return true when the given instruction needs to emit a write dependency
4095 // barrier (for RaW hazards) because it doesn't operate at a fixed latency, and
4096 // setting the maximum number of stall counts is not enough. This is only legal
4097 // if the instruction output something.
4098 bool
needWrDepBar(const Instruction * insn) const4099 SchedDataCalculatorGM107::needWrDepBar(const Instruction *insn) const
4100 {
4101 if (!targ->isBarrierRequired(insn))
4102 return false;
4103
4104 for (int d = 0; insn->defExists(d); ++d) {
4105 if (insn->def(d).getFile() == FILE_GPR ||
4106 insn->def(d).getFile() == FILE_FLAGS ||
4107 insn->def(d).getFile() == FILE_PREDICATE)
4108 return true;
4109 }
4110 return false;
4111 }
4112
4113 // Helper function for findFirstUse() and findFirstDef()
4114 bool
doesInsnWriteTo(const Instruction * insn,const Value * val) const4115 SchedDataCalculatorGM107::doesInsnWriteTo(const Instruction *insn,
4116 const Value *val) const
4117 {
4118 if (val->reg.file != FILE_GPR &&
4119 val->reg.file != FILE_PREDICATE &&
4120 val->reg.file != FILE_FLAGS)
4121 return false;
4122
4123 for (int d = 0; insn->defExists(d); ++d) {
4124 const Value* def = insn->getDef(d);
4125 int minGPR = def->reg.data.id;
4126 int maxGPR = minGPR + def->reg.size / 4 - 1;
4127
4128 if (def->reg.file != val->reg.file)
4129 continue;
4130
4131 if (def->reg.file == FILE_GPR) {
4132 if (val->reg.data.id + val->reg.size / 4 - 1 < minGPR ||
4133 val->reg.data.id > maxGPR)
4134 continue;
4135 return true;
4136 } else
4137 if (def->reg.file == FILE_PREDICATE) {
4138 if (val->reg.data.id != minGPR)
4139 continue;
4140 return true;
4141 } else
4142 if (def->reg.file == FILE_FLAGS) {
4143 if (val->reg.data.id != minGPR)
4144 continue;
4145 return true;
4146 }
4147 }
4148
4149 return false;
4150 }
4151
4152 // Find the next instruction inside the same basic block which uses (reads or
4153 // writes from) the output of the given instruction in order to avoid RaW and
4154 // WaW hazards.
4155 Instruction *
findFirstUse(const Instruction * bari) const4156 SchedDataCalculatorGM107::findFirstUse(const Instruction *bari) const
4157 {
4158 Instruction *insn, *next;
4159
4160 if (!bari->defExists(0))
4161 return NULL;
4162
4163 for (insn = bari->next; insn != NULL; insn = next) {
4164 next = insn->next;
4165
4166 for (int s = 0; insn->srcExists(s); ++s)
4167 if (doesInsnWriteTo(bari, insn->getSrc(s)))
4168 return insn;
4169
4170 for (int d = 0; insn->defExists(d); ++d)
4171 if (doesInsnWriteTo(bari, insn->getDef(d)))
4172 return insn;
4173 }
4174 return NULL;
4175 }
4176
4177 // Find the next instruction inside the same basic block which overwrites, at
4178 // least, one source of the given instruction in order to avoid WaR hazards.
4179 Instruction *
findFirstDef(const Instruction * bari) const4180 SchedDataCalculatorGM107::findFirstDef(const Instruction *bari) const
4181 {
4182 Instruction *insn, *next;
4183
4184 if (!bari->srcExists(0))
4185 return NULL;
4186
4187 for (insn = bari->next; insn != NULL; insn = next) {
4188 next = insn->next;
4189
4190 for (int s = 0; bari->srcExists(s); ++s)
4191 if (doesInsnWriteTo(insn, bari->getSrc(s)))
4192 return insn;
4193 }
4194 return NULL;
4195 }
4196
4197 // Dependency barriers:
4198 // This pass is a bit ugly and could probably be improved by performing a
4199 // better allocation.
4200 //
4201 // The main idea is to avoid WaR and RaW hazards by emitting read/write
4202 // dependency barriers using the control codes.
4203 bool
insertBarriers(BasicBlock * bb)4204 SchedDataCalculatorGM107::insertBarriers(BasicBlock *bb)
4205 {
4206 std::list<LiveBarUse> live_uses;
4207 std::list<LiveBarDef> live_defs;
4208 Instruction *insn, *next;
4209 BitSet bars(6, true);
4210 int bar_id;
4211
4212 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4213 Instruction *usei = NULL, *defi = NULL;
4214 bool need_wr_bar, need_rd_bar;
4215
4216 next = insn->next;
4217
4218 // Expire old barrier uses.
4219 for (std::list<LiveBarUse>::iterator it = live_uses.begin();
4220 it != live_uses.end();) {
4221 if (insn->serial >= it->usei->serial) {
4222 int wr = getWrDepBar(it->insn);
4223 emitWtDepBar(insn, wr);
4224 bars.clr(wr); // free barrier
4225 it = live_uses.erase(it);
4226 continue;
4227 }
4228 ++it;
4229 }
4230
4231 // Expire old barrier defs.
4232 for (std::list<LiveBarDef>::iterator it = live_defs.begin();
4233 it != live_defs.end();) {
4234 if (insn->serial >= it->defi->serial) {
4235 int rd = getRdDepBar(it->insn);
4236 emitWtDepBar(insn, rd);
4237 bars.clr(rd); // free barrier
4238 it = live_defs.erase(it);
4239 continue;
4240 }
4241 ++it;
4242 }
4243
4244 need_wr_bar = needWrDepBar(insn);
4245 need_rd_bar = needRdDepBar(insn);
4246
4247 if (need_wr_bar) {
4248 // When the instruction requires to emit a write dependency barrier
4249 // (all which write something at a variable latency), find the next
4250 // instruction which reads the outputs (or writes to them, potentially
4251 // completing before this insn.
4252 usei = findFirstUse(insn);
4253
4254 // Allocate and emit a new barrier.
4255 bar_id = bars.findFreeRange(1);
4256 if (bar_id == -1)
4257 bar_id = 5;
4258 bars.set(bar_id);
4259 emitWrDepBar(insn, bar_id);
4260 if (usei)
4261 live_uses.push_back(LiveBarUse(insn, usei));
4262 }
4263
4264 if (need_rd_bar) {
4265 // When the instruction requires to emit a read dependency barrier
4266 // (all which read something at a variable latency), find the next
4267 // instruction which will write the inputs.
4268 defi = findFirstDef(insn);
4269
4270 if (usei && defi && usei->serial <= defi->serial)
4271 continue;
4272
4273 // Allocate and emit a new barrier.
4274 bar_id = bars.findFreeRange(1);
4275 if (bar_id == -1)
4276 bar_id = 5;
4277 bars.set(bar_id);
4278 emitRdDepBar(insn, bar_id);
4279 if (defi)
4280 live_defs.push_back(LiveBarDef(insn, defi));
4281 }
4282 }
4283
4284 // Remove unnecessary barrier waits.
4285 BitSet alive_bars(6, true);
4286 for (insn = bb->getEntry(); insn != NULL; insn = next) {
4287 int wr, rd, wt;
4288
4289 next = insn->next;
4290
4291 wr = getWrDepBar(insn);
4292 rd = getRdDepBar(insn);
4293 wt = getWtDepBar(insn);
4294
4295 for (int idx = 0; idx < 6; ++idx) {
4296 if (!(wt & (1 << idx)))
4297 continue;
4298 if (!alive_bars.test(idx)) {
4299 insn->sched &= ~(1 << (11 + idx));
4300 } else {
4301 alive_bars.clr(idx);
4302 }
4303 }
4304
4305 if (wr < 6)
4306 alive_bars.set(wr);
4307 if (rd < 6)
4308 alive_bars.set(rd);
4309 }
4310
4311 return true;
4312 }
4313
4314 bool
visit(Function * func)4315 SchedDataCalculatorGM107::visit(Function *func)
4316 {
4317 ArrayList insns;
4318
4319 func->orderInstructions(insns);
4320
4321 scoreBoards.resize(func->cfg.getSize());
4322 for (size_t i = 0; i < scoreBoards.size(); ++i)
4323 scoreBoards[i].wipe();
4324 return true;
4325 }
4326
4327 bool
visit(BasicBlock * bb)4328 SchedDataCalculatorGM107::visit(BasicBlock *bb)
4329 {
4330 Instruction *insn, *next = NULL;
4331 int cycle = 0;
4332
4333 for (Instruction *insn = bb->getEntry(); insn; insn = insn->next) {
4334 /*XXX*/
4335 insn->sched = 0x7e0;
4336 }
4337
4338 if (!debug_get_bool_option("NV50_PROG_SCHED", true))
4339 return true;
4340
4341 // Insert read/write dependency barriers for instructions which don't
4342 // operate at a fixed latency.
4343 insertBarriers(bb);
4344
4345 score = &scoreBoards.at(bb->getId());
4346
4347 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) {
4348 // back branches will wait until all target dependencies are satisfied
4349 if (ei.getType() == Graph::Edge::BACK) // sched would be uninitialized
4350 continue;
4351 BasicBlock *in = BasicBlock::get(ei.getNode());
4352 score->setMax(&scoreBoards.at(in->getId()));
4353 }
4354
4355 #ifdef GM107_DEBUG_SCHED_DATA
4356 INFO("=== BB:%i initial scores\n", bb->getId());
4357 score->print(cycle);
4358 #endif
4359
4360 // Because barriers are allocated locally (intra-BB), we have to make sure
4361 // that all produced barriers have been consumed before entering inside a
4362 // new basic block. The best way is to do a global allocation pre RA but
4363 // it's really more difficult, especially because of the phi nodes. Anyways,
4364 // it seems like that waiting on a barrier which has already been consumed
4365 // doesn't add any additional cost, it's just not elegant!
4366 Instruction *start = bb->getEntry();
4367 if (start && bb->cfg.incidentCount() > 0) {
4368 for (int b = 0; b < 6; b++)
4369 emitWtDepBar(start, b);
4370 }
4371
4372 for (insn = bb->getEntry(); insn && insn->next; insn = insn->next) {
4373 next = insn->next;
4374
4375 commitInsn(insn, cycle);
4376 int delay = calcDelay(next, cycle);
4377 setDelay(insn, delay, next);
4378 cycle += getStall(insn);
4379
4380 setReuseFlag(insn);
4381
4382 // XXX: The yield flag seems to destroy a bunch of things when it is
4383 // set on every instruction, need investigation.
4384 //emitYield(insn);
4385
4386 #ifdef GM107_DEBUG_SCHED_DATA
4387 printSchedInfo(cycle, insn);
4388 insn->print();
4389 next->print();
4390 #endif
4391 }
4392
4393 if (!insn)
4394 return true;
4395 commitInsn(insn, cycle);
4396
4397 int bbDelay = -1;
4398
4399 #ifdef GM107_DEBUG_SCHED_DATA
4400 fprintf(stderr, "last instruction is : ");
4401 insn->print();
4402 fprintf(stderr, "cycle=%d\n", cycle);
4403 #endif
4404
4405 for (Graph::EdgeIterator ei = bb->cfg.outgoing(); !ei.end(); ei.next()) {
4406 BasicBlock *out = BasicBlock::get(ei.getNode());
4407
4408 if (ei.getType() != Graph::Edge::BACK) {
4409 // Only test the first instruction of the outgoing block.
4410 next = out->getEntry();
4411 if (next) {
4412 bbDelay = MAX2(bbDelay, calcDelay(next, cycle));
4413 } else {
4414 // When the outgoing BB is empty, make sure to set the number of
4415 // stall counts needed by the instruction because we don't know the
4416 // next instruction.
4417 bbDelay = MAX2(bbDelay, targ->getLatency(insn));
4418 }
4419 } else {
4420 // Wait until all dependencies are satisfied.
4421 const int regsFree = score->getLatest();
4422 next = out->getFirst();
4423 for (int c = cycle; next && c < regsFree; next = next->next) {
4424 bbDelay = MAX2(bbDelay, calcDelay(next, c));
4425 c += getStall(next);
4426 }
4427 next = NULL;
4428 }
4429 }
4430 if (bb->cfg.outgoingCount() != 1)
4431 next = NULL;
4432 setDelay(insn, bbDelay, next);
4433 cycle += getStall(insn);
4434
4435 score->rebase(cycle); // common base for initializing out blocks' scores
4436 return true;
4437 }
4438
4439 /*******************************************************************************
4440 * main
4441 ******************************************************************************/
4442
4443 void
prepareEmission(Function * func)4444 CodeEmitterGM107::prepareEmission(Function *func)
4445 {
4446 SchedDataCalculatorGM107 sched(targGM107);
4447 CodeEmitter::prepareEmission(func);
4448 sched.run(func, true, true);
4449 }
4450
sizeToBundlesGM107(uint32_t size)4451 static inline uint32_t sizeToBundlesGM107(uint32_t size)
4452 {
4453 return (size + 23) / 24;
4454 }
4455
4456 void
prepareEmission(Program * prog)4457 CodeEmitterGM107::prepareEmission(Program *prog)
4458 {
4459 for (ArrayList::Iterator fi = prog->allFuncs.iterator();
4460 !fi.end(); fi.next()) {
4461 Function *func = reinterpret_cast<Function *>(fi.get());
4462 func->binPos = prog->binSize;
4463 prepareEmission(func);
4464
4465 // adjust sizes & positions for schedulding info:
4466 if (prog->getTarget()->hasSWSched) {
4467 uint32_t adjPos = func->binPos;
4468 BasicBlock *bb = NULL;
4469 for (int i = 0; i < func->bbCount; ++i) {
4470 bb = func->bbArray[i];
4471 int32_t adjSize = bb->binSize;
4472 if (adjPos % 32) {
4473 adjSize -= 32 - adjPos % 32;
4474 if (adjSize < 0)
4475 adjSize = 0;
4476 }
4477 adjSize = bb->binSize + sizeToBundlesGM107(adjSize) * 8;
4478 bb->binPos = adjPos;
4479 bb->binSize = adjSize;
4480 adjPos += adjSize;
4481 }
4482 if (bb)
4483 func->binSize = adjPos - func->binPos;
4484 }
4485
4486 prog->binSize += func->binSize;
4487 }
4488 }
4489
CodeEmitterGM107(const TargetGM107 * target)4490 CodeEmitterGM107::CodeEmitterGM107(const TargetGM107 *target)
4491 : CodeEmitter(target),
4492 targGM107(target),
4493 progType(Program::TYPE_VERTEX),
4494 insn(NULL),
4495 writeIssueDelays(target->hasSWSched),
4496 data(NULL)
4497 {
4498 code = NULL;
4499 codeSize = codeSizeLimit = 0;
4500 relocInfo = NULL;
4501 }
4502
4503 CodeEmitter *
createCodeEmitterGM107(Program::Type type)4504 TargetGM107::createCodeEmitterGM107(Program::Type type)
4505 {
4506 CodeEmitterGM107 *emit = new CodeEmitterGM107(this);
4507 emit->setProgramType(type);
4508 return emit;
4509 }
4510
4511 } // namespace nv50_ir
4512