1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9
10 #include "MCTargetDesc/X86BaseInfo.h"
11 #include "X86AsmInstrumentation.h"
12 #include "X86AsmParserCommon.h"
13 #include "X86Operand.h"
14 #include "llvm/ADT/STLExtras.h"
15 #include "llvm/ADT/SmallString.h"
16 #include "llvm/ADT/SmallVector.h"
17 #include "llvm/ADT/StringSwitch.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/MC/MCContext.h"
20 #include "llvm/MC/MCExpr.h"
21 #include "llvm/MC/MCInst.h"
22 #include "llvm/MC/MCInstrInfo.h"
23 #include "llvm/MC/MCParser/MCAsmLexer.h"
24 #include "llvm/MC/MCParser/MCAsmParser.h"
25 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
26 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
27 #include "llvm/MC/MCRegisterInfo.h"
28 #include "llvm/MC/MCSection.h"
29 #include "llvm/MC/MCStreamer.h"
30 #include "llvm/MC/MCSubtargetInfo.h"
31 #include "llvm/MC/MCSymbol.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetRegistry.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include <algorithm>
36 #include <memory>
37
38 using namespace llvm;
39
40 namespace {
41
42 static const char OpPrecedence[] = {
43 0, // IC_OR
44 1, // IC_XOR
45 2, // IC_AND
46 3, // IC_LSHIFT
47 3, // IC_RSHIFT
48 4, // IC_PLUS
49 4, // IC_MINUS
50 5, // IC_MULTIPLY
51 5, // IC_DIVIDE
52 6, // IC_RPAREN
53 7, // IC_LPAREN
54 0, // IC_IMM
55 0 // IC_REGISTER
56 };
57
58 class X86AsmParser : public MCTargetAsmParser {
59 const MCInstrInfo &MII;
60 ParseInstructionInfo *InstInfo;
61 std::unique_ptr<X86AsmInstrumentation> Instrumentation;
62
63 private:
consumeToken()64 SMLoc consumeToken() {
65 MCAsmParser &Parser = getParser();
66 SMLoc Result = Parser.getTok().getLoc();
67 Parser.Lex();
68 return Result;
69 }
70
71 enum InfixCalculatorTok {
72 IC_OR = 0,
73 IC_XOR,
74 IC_AND,
75 IC_LSHIFT,
76 IC_RSHIFT,
77 IC_PLUS,
78 IC_MINUS,
79 IC_MULTIPLY,
80 IC_DIVIDE,
81 IC_RPAREN,
82 IC_LPAREN,
83 IC_IMM,
84 IC_REGISTER
85 };
86
87 class InfixCalculator {
88 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
89 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
90 SmallVector<ICToken, 4> PostfixStack;
91
92 public:
popOperand()93 int64_t popOperand() {
94 assert (!PostfixStack.empty() && "Poped an empty stack!");
95 ICToken Op = PostfixStack.pop_back_val();
96 assert ((Op.first == IC_IMM || Op.first == IC_REGISTER)
97 && "Expected and immediate or register!");
98 return Op.second;
99 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)100 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
101 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
102 "Unexpected operand!");
103 PostfixStack.push_back(std::make_pair(Op, Val));
104 }
105
popOperator()106 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)107 void pushOperator(InfixCalculatorTok Op) {
108 // Push the new operator if the stack is empty.
109 if (InfixOperatorStack.empty()) {
110 InfixOperatorStack.push_back(Op);
111 return;
112 }
113
114 // Push the new operator if it has a higher precedence than the operator
115 // on the top of the stack or the operator on the top of the stack is a
116 // left parentheses.
117 unsigned Idx = InfixOperatorStack.size() - 1;
118 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
119 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
120 InfixOperatorStack.push_back(Op);
121 return;
122 }
123
124 // The operator on the top of the stack has higher precedence than the
125 // new operator.
126 unsigned ParenCount = 0;
127 while (1) {
128 // Nothing to process.
129 if (InfixOperatorStack.empty())
130 break;
131
132 Idx = InfixOperatorStack.size() - 1;
133 StackOp = InfixOperatorStack[Idx];
134 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
135 break;
136
137 // If we have an even parentheses count and we see a left parentheses,
138 // then stop processing.
139 if (!ParenCount && StackOp == IC_LPAREN)
140 break;
141
142 if (StackOp == IC_RPAREN) {
143 ++ParenCount;
144 InfixOperatorStack.pop_back();
145 } else if (StackOp == IC_LPAREN) {
146 --ParenCount;
147 InfixOperatorStack.pop_back();
148 } else {
149 InfixOperatorStack.pop_back();
150 PostfixStack.push_back(std::make_pair(StackOp, 0));
151 }
152 }
153 // Push the new operator.
154 InfixOperatorStack.push_back(Op);
155 }
156
execute()157 int64_t execute() {
158 // Push any remaining operators onto the postfix stack.
159 while (!InfixOperatorStack.empty()) {
160 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
161 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
162 PostfixStack.push_back(std::make_pair(StackOp, 0));
163 }
164
165 if (PostfixStack.empty())
166 return 0;
167
168 SmallVector<ICToken, 16> OperandStack;
169 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
170 ICToken Op = PostfixStack[i];
171 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
172 OperandStack.push_back(Op);
173 } else {
174 assert (OperandStack.size() > 1 && "Too few operands.");
175 int64_t Val;
176 ICToken Op2 = OperandStack.pop_back_val();
177 ICToken Op1 = OperandStack.pop_back_val();
178 switch (Op.first) {
179 default:
180 report_fatal_error("Unexpected operator!");
181 break;
182 case IC_PLUS:
183 Val = Op1.second + Op2.second;
184 OperandStack.push_back(std::make_pair(IC_IMM, Val));
185 break;
186 case IC_MINUS:
187 Val = Op1.second - Op2.second;
188 OperandStack.push_back(std::make_pair(IC_IMM, Val));
189 break;
190 case IC_MULTIPLY:
191 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
192 "Multiply operation with an immediate and a register!");
193 Val = Op1.second * Op2.second;
194 OperandStack.push_back(std::make_pair(IC_IMM, Val));
195 break;
196 case IC_DIVIDE:
197 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
198 "Divide operation with an immediate and a register!");
199 assert (Op2.second != 0 && "Division by zero!");
200 Val = Op1.second / Op2.second;
201 OperandStack.push_back(std::make_pair(IC_IMM, Val));
202 break;
203 case IC_OR:
204 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
205 "Or operation with an immediate and a register!");
206 Val = Op1.second | Op2.second;
207 OperandStack.push_back(std::make_pair(IC_IMM, Val));
208 break;
209 case IC_XOR:
210 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
211 "Xor operation with an immediate and a register!");
212 Val = Op1.second ^ Op2.second;
213 OperandStack.push_back(std::make_pair(IC_IMM, Val));
214 break;
215 case IC_AND:
216 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
217 "And operation with an immediate and a register!");
218 Val = Op1.second & Op2.second;
219 OperandStack.push_back(std::make_pair(IC_IMM, Val));
220 break;
221 case IC_LSHIFT:
222 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
223 "Left shift operation with an immediate and a register!");
224 Val = Op1.second << Op2.second;
225 OperandStack.push_back(std::make_pair(IC_IMM, Val));
226 break;
227 case IC_RSHIFT:
228 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
229 "Right shift operation with an immediate and a register!");
230 Val = Op1.second >> Op2.second;
231 OperandStack.push_back(std::make_pair(IC_IMM, Val));
232 break;
233 }
234 }
235 }
236 assert (OperandStack.size() == 1 && "Expected a single result.");
237 return OperandStack.pop_back_val().second;
238 }
239 };
240
241 enum IntelExprState {
242 IES_OR,
243 IES_XOR,
244 IES_AND,
245 IES_LSHIFT,
246 IES_RSHIFT,
247 IES_PLUS,
248 IES_MINUS,
249 IES_NOT,
250 IES_MULTIPLY,
251 IES_DIVIDE,
252 IES_LBRAC,
253 IES_RBRAC,
254 IES_LPAREN,
255 IES_RPAREN,
256 IES_REGISTER,
257 IES_INTEGER,
258 IES_IDENTIFIER,
259 IES_ERROR
260 };
261
262 class IntelExprStateMachine {
263 IntelExprState State, PrevState;
264 unsigned BaseReg, IndexReg, TmpReg, Scale;
265 int64_t Imm;
266 const MCExpr *Sym;
267 StringRef SymName;
268 bool StopOnLBrac, AddImmPrefix;
269 InfixCalculator IC;
270 InlineAsmIdentifierInfo Info;
271
272 public:
IntelExprStateMachine(int64_t imm,bool stoponlbrac,bool addimmprefix)273 IntelExprStateMachine(int64_t imm, bool stoponlbrac, bool addimmprefix) :
274 State(IES_PLUS), PrevState(IES_ERROR), BaseReg(0), IndexReg(0), TmpReg(0),
275 Scale(1), Imm(imm), Sym(nullptr), StopOnLBrac(stoponlbrac),
276 AddImmPrefix(addimmprefix) { Info.clear(); }
277
getBaseReg()278 unsigned getBaseReg() { return BaseReg; }
getIndexReg()279 unsigned getIndexReg() { return IndexReg; }
getScale()280 unsigned getScale() { return Scale; }
getSym()281 const MCExpr *getSym() { return Sym; }
getSymName()282 StringRef getSymName() { return SymName; }
getImm()283 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState()284 bool isValidEndState() {
285 return State == IES_RBRAC || State == IES_INTEGER;
286 }
getStopOnLBrac()287 bool getStopOnLBrac() { return StopOnLBrac; }
getAddImmPrefix()288 bool getAddImmPrefix() { return AddImmPrefix; }
hadError()289 bool hadError() { return State == IES_ERROR; }
290
getIdentifierInfo()291 InlineAsmIdentifierInfo &getIdentifierInfo() {
292 return Info;
293 }
294
onOr()295 void onOr() {
296 IntelExprState CurrState = State;
297 switch (State) {
298 default:
299 State = IES_ERROR;
300 break;
301 case IES_INTEGER:
302 case IES_RPAREN:
303 case IES_REGISTER:
304 State = IES_OR;
305 IC.pushOperator(IC_OR);
306 break;
307 }
308 PrevState = CurrState;
309 }
onXor()310 void onXor() {
311 IntelExprState CurrState = State;
312 switch (State) {
313 default:
314 State = IES_ERROR;
315 break;
316 case IES_INTEGER:
317 case IES_RPAREN:
318 case IES_REGISTER:
319 State = IES_XOR;
320 IC.pushOperator(IC_XOR);
321 break;
322 }
323 PrevState = CurrState;
324 }
onAnd()325 void onAnd() {
326 IntelExprState CurrState = State;
327 switch (State) {
328 default:
329 State = IES_ERROR;
330 break;
331 case IES_INTEGER:
332 case IES_RPAREN:
333 case IES_REGISTER:
334 State = IES_AND;
335 IC.pushOperator(IC_AND);
336 break;
337 }
338 PrevState = CurrState;
339 }
onLShift()340 void onLShift() {
341 IntelExprState CurrState = State;
342 switch (State) {
343 default:
344 State = IES_ERROR;
345 break;
346 case IES_INTEGER:
347 case IES_RPAREN:
348 case IES_REGISTER:
349 State = IES_LSHIFT;
350 IC.pushOperator(IC_LSHIFT);
351 break;
352 }
353 PrevState = CurrState;
354 }
onRShift()355 void onRShift() {
356 IntelExprState CurrState = State;
357 switch (State) {
358 default:
359 State = IES_ERROR;
360 break;
361 case IES_INTEGER:
362 case IES_RPAREN:
363 case IES_REGISTER:
364 State = IES_RSHIFT;
365 IC.pushOperator(IC_RSHIFT);
366 break;
367 }
368 PrevState = CurrState;
369 }
onPlus()370 void onPlus() {
371 IntelExprState CurrState = State;
372 switch (State) {
373 default:
374 State = IES_ERROR;
375 break;
376 case IES_INTEGER:
377 case IES_RPAREN:
378 case IES_REGISTER:
379 State = IES_PLUS;
380 IC.pushOperator(IC_PLUS);
381 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
382 // If we already have a BaseReg, then assume this is the IndexReg with
383 // a scale of 1.
384 if (!BaseReg) {
385 BaseReg = TmpReg;
386 } else {
387 assert (!IndexReg && "BaseReg/IndexReg already set!");
388 IndexReg = TmpReg;
389 Scale = 1;
390 }
391 }
392 break;
393 }
394 PrevState = CurrState;
395 }
onMinus()396 void onMinus() {
397 IntelExprState CurrState = State;
398 switch (State) {
399 default:
400 State = IES_ERROR;
401 break;
402 case IES_PLUS:
403 case IES_NOT:
404 case IES_MULTIPLY:
405 case IES_DIVIDE:
406 case IES_LPAREN:
407 case IES_RPAREN:
408 case IES_LBRAC:
409 case IES_RBRAC:
410 case IES_INTEGER:
411 case IES_REGISTER:
412 State = IES_MINUS;
413 // Only push the minus operator if it is not a unary operator.
414 if (!(CurrState == IES_PLUS || CurrState == IES_MINUS ||
415 CurrState == IES_MULTIPLY || CurrState == IES_DIVIDE ||
416 CurrState == IES_LPAREN || CurrState == IES_LBRAC))
417 IC.pushOperator(IC_MINUS);
418 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
419 // If we already have a BaseReg, then assume this is the IndexReg with
420 // a scale of 1.
421 if (!BaseReg) {
422 BaseReg = TmpReg;
423 } else {
424 assert (!IndexReg && "BaseReg/IndexReg already set!");
425 IndexReg = TmpReg;
426 Scale = 1;
427 }
428 }
429 break;
430 }
431 PrevState = CurrState;
432 }
onNot()433 void onNot() {
434 IntelExprState CurrState = State;
435 switch (State) {
436 default:
437 State = IES_ERROR;
438 break;
439 case IES_PLUS:
440 case IES_NOT:
441 State = IES_NOT;
442 break;
443 }
444 PrevState = CurrState;
445 }
onRegister(unsigned Reg)446 void onRegister(unsigned Reg) {
447 IntelExprState CurrState = State;
448 switch (State) {
449 default:
450 State = IES_ERROR;
451 break;
452 case IES_PLUS:
453 case IES_LPAREN:
454 State = IES_REGISTER;
455 TmpReg = Reg;
456 IC.pushOperand(IC_REGISTER);
457 break;
458 case IES_MULTIPLY:
459 // Index Register - Scale * Register
460 if (PrevState == IES_INTEGER) {
461 assert (!IndexReg && "IndexReg already set!");
462 State = IES_REGISTER;
463 IndexReg = Reg;
464 // Get the scale and replace the 'Scale * Register' with '0'.
465 Scale = IC.popOperand();
466 IC.pushOperand(IC_IMM);
467 IC.popOperator();
468 } else {
469 State = IES_ERROR;
470 }
471 break;
472 }
473 PrevState = CurrState;
474 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName)475 void onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName) {
476 PrevState = State;
477 switch (State) {
478 default:
479 State = IES_ERROR;
480 break;
481 case IES_PLUS:
482 case IES_MINUS:
483 case IES_NOT:
484 State = IES_INTEGER;
485 Sym = SymRef;
486 SymName = SymRefName;
487 IC.pushOperand(IC_IMM);
488 break;
489 }
490 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)491 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
492 IntelExprState CurrState = State;
493 switch (State) {
494 default:
495 State = IES_ERROR;
496 break;
497 case IES_PLUS:
498 case IES_MINUS:
499 case IES_NOT:
500 case IES_OR:
501 case IES_XOR:
502 case IES_AND:
503 case IES_LSHIFT:
504 case IES_RSHIFT:
505 case IES_DIVIDE:
506 case IES_MULTIPLY:
507 case IES_LPAREN:
508 State = IES_INTEGER;
509 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
510 // Index Register - Register * Scale
511 assert (!IndexReg && "IndexReg already set!");
512 IndexReg = TmpReg;
513 Scale = TmpInt;
514 if(Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
515 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
516 return true;
517 }
518 // Get the scale and replace the 'Register * Scale' with '0'.
519 IC.popOperator();
520 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
521 PrevState == IES_OR || PrevState == IES_AND ||
522 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
523 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
524 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
525 PrevState == IES_NOT || PrevState == IES_XOR) &&
526 CurrState == IES_MINUS) {
527 // Unary minus. No need to pop the minus operand because it was never
528 // pushed.
529 IC.pushOperand(IC_IMM, -TmpInt); // Push -Imm.
530 } else if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
531 PrevState == IES_OR || PrevState == IES_AND ||
532 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
533 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
534 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
535 PrevState == IES_NOT || PrevState == IES_XOR) &&
536 CurrState == IES_NOT) {
537 // Unary not. No need to pop the not operand because it was never
538 // pushed.
539 IC.pushOperand(IC_IMM, ~TmpInt); // Push ~Imm.
540 } else {
541 IC.pushOperand(IC_IMM, TmpInt);
542 }
543 break;
544 }
545 PrevState = CurrState;
546 return false;
547 }
onStar()548 void onStar() {
549 PrevState = State;
550 switch (State) {
551 default:
552 State = IES_ERROR;
553 break;
554 case IES_INTEGER:
555 case IES_REGISTER:
556 case IES_RPAREN:
557 State = IES_MULTIPLY;
558 IC.pushOperator(IC_MULTIPLY);
559 break;
560 }
561 }
onDivide()562 void onDivide() {
563 PrevState = State;
564 switch (State) {
565 default:
566 State = IES_ERROR;
567 break;
568 case IES_INTEGER:
569 case IES_RPAREN:
570 State = IES_DIVIDE;
571 IC.pushOperator(IC_DIVIDE);
572 break;
573 }
574 }
onLBrac()575 void onLBrac() {
576 PrevState = State;
577 switch (State) {
578 default:
579 State = IES_ERROR;
580 break;
581 case IES_RBRAC:
582 State = IES_PLUS;
583 IC.pushOperator(IC_PLUS);
584 break;
585 }
586 }
onRBrac()587 void onRBrac() {
588 IntelExprState CurrState = State;
589 switch (State) {
590 default:
591 State = IES_ERROR;
592 break;
593 case IES_INTEGER:
594 case IES_REGISTER:
595 case IES_RPAREN:
596 State = IES_RBRAC;
597 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
598 // If we already have a BaseReg, then assume this is the IndexReg with
599 // a scale of 1.
600 if (!BaseReg) {
601 BaseReg = TmpReg;
602 } else {
603 assert (!IndexReg && "BaseReg/IndexReg already set!");
604 IndexReg = TmpReg;
605 Scale = 1;
606 }
607 }
608 break;
609 }
610 PrevState = CurrState;
611 }
onLParen()612 void onLParen() {
613 IntelExprState CurrState = State;
614 switch (State) {
615 default:
616 State = IES_ERROR;
617 break;
618 case IES_PLUS:
619 case IES_MINUS:
620 case IES_NOT:
621 case IES_OR:
622 case IES_XOR:
623 case IES_AND:
624 case IES_LSHIFT:
625 case IES_RSHIFT:
626 case IES_MULTIPLY:
627 case IES_DIVIDE:
628 case IES_LPAREN:
629 // FIXME: We don't handle this type of unary minus or not, yet.
630 if ((PrevState == IES_PLUS || PrevState == IES_MINUS ||
631 PrevState == IES_OR || PrevState == IES_AND ||
632 PrevState == IES_LSHIFT || PrevState == IES_RSHIFT ||
633 PrevState == IES_MULTIPLY || PrevState == IES_DIVIDE ||
634 PrevState == IES_LPAREN || PrevState == IES_LBRAC ||
635 PrevState == IES_NOT || PrevState == IES_XOR) &&
636 (CurrState == IES_MINUS || CurrState == IES_NOT)) {
637 State = IES_ERROR;
638 break;
639 }
640 State = IES_LPAREN;
641 IC.pushOperator(IC_LPAREN);
642 break;
643 }
644 PrevState = CurrState;
645 }
onRParen()646 void onRParen() {
647 PrevState = State;
648 switch (State) {
649 default:
650 State = IES_ERROR;
651 break;
652 case IES_INTEGER:
653 case IES_REGISTER:
654 case IES_RPAREN:
655 State = IES_RPAREN;
656 IC.pushOperator(IC_RPAREN);
657 break;
658 }
659 }
660 };
661
Error(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)662 bool Error(SMLoc L, const Twine &Msg,
663 ArrayRef<SMRange> Ranges = None,
664 bool MatchingInlineAsm = false) {
665 MCAsmParser &Parser = getParser();
666 if (MatchingInlineAsm) return true;
667 return Parser.Error(L, Msg, Ranges);
668 }
669
ErrorAndEatStatement(SMLoc L,const Twine & Msg,ArrayRef<SMRange> Ranges=None,bool MatchingInlineAsm=false)670 bool ErrorAndEatStatement(SMLoc L, const Twine &Msg,
671 ArrayRef<SMRange> Ranges = None,
672 bool MatchingInlineAsm = false) {
673 MCAsmParser &Parser = getParser();
674 Parser.eatToEndOfStatement();
675 return Error(L, Msg, Ranges, MatchingInlineAsm);
676 }
677
ErrorOperand(SMLoc Loc,StringRef Msg)678 std::nullptr_t ErrorOperand(SMLoc Loc, StringRef Msg) {
679 Error(Loc, Msg);
680 return nullptr;
681 }
682
683 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
684 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
685 bool IsSIReg(unsigned Reg);
686 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
687 void
688 AddDefaultSrcDestOperands(OperandVector &Operands,
689 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
690 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
691 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
692 OperandVector &FinalOperands);
693 std::unique_ptr<X86Operand> ParseOperand();
694 std::unique_ptr<X86Operand> ParseATTOperand();
695 std::unique_ptr<X86Operand> ParseIntelOperand();
696 std::unique_ptr<X86Operand> ParseIntelOffsetOfOperator();
697 bool ParseIntelDotOperator(const MCExpr *Disp, const MCExpr *&NewDisp);
698 std::unique_ptr<X86Operand> ParseIntelOperator(unsigned OpKind);
699 std::unique_ptr<X86Operand>
700 ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start, unsigned Size);
701 std::unique_ptr<X86Operand>
702 ParseIntelMemOperand(int64_t ImmDisp, SMLoc StartLoc, unsigned Size);
703 std::unique_ptr<X86Operand> ParseRoundingModeOp(SMLoc Start, SMLoc End);
704 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
705 std::unique_ptr<X86Operand> ParseIntelBracExpression(unsigned SegReg,
706 SMLoc Start,
707 int64_t ImmDisp,
708 unsigned Size);
709 bool ParseIntelIdentifier(const MCExpr *&Val, StringRef &Identifier,
710 InlineAsmIdentifierInfo &Info,
711 bool IsUnevaluatedOperand, SMLoc &End);
712
713 std::unique_ptr<X86Operand> ParseMemOperand(unsigned SegReg, SMLoc StartLoc);
714
715 std::unique_ptr<X86Operand>
716 CreateMemForInlineAsm(unsigned SegReg, const MCExpr *Disp, unsigned BaseReg,
717 unsigned IndexReg, unsigned Scale, SMLoc Start,
718 SMLoc End, unsigned Size, StringRef Identifier,
719 InlineAsmIdentifierInfo &Info);
720
721 bool parseDirectiveEven(SMLoc L);
722 bool ParseDirectiveWord(unsigned Size, SMLoc L);
723 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
724
725 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
726
727 /// Wrapper around MCStreamer::EmitInstruction(). Possibly adds
728 /// instrumentation around Inst.
729 void EmitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
730
731 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
732 OperandVector &Operands, MCStreamer &Out,
733 uint64_t &ErrorInfo,
734 bool MatchingInlineAsm) override;
735
736 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
737 MCStreamer &Out, bool MatchingInlineAsm);
738
739 bool ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
740 bool MatchingInlineAsm);
741
742 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
743 OperandVector &Operands, MCStreamer &Out,
744 uint64_t &ErrorInfo,
745 bool MatchingInlineAsm);
746
747 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
748 OperandVector &Operands, MCStreamer &Out,
749 uint64_t &ErrorInfo,
750 bool MatchingInlineAsm);
751
752 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
753
754 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
755 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
756 /// \return \c true if no parsing errors occurred, \c false otherwise.
757 bool HandleAVX512Operand(OperandVector &Operands,
758 const MCParsedAsmOperand &Op);
759
is64BitMode() const760 bool is64BitMode() const {
761 // FIXME: Can tablegen auto-generate this?
762 return getSTI().getFeatureBits()[X86::Mode64Bit];
763 }
is32BitMode() const764 bool is32BitMode() const {
765 // FIXME: Can tablegen auto-generate this?
766 return getSTI().getFeatureBits()[X86::Mode32Bit];
767 }
is16BitMode() const768 bool is16BitMode() const {
769 // FIXME: Can tablegen auto-generate this?
770 return getSTI().getFeatureBits()[X86::Mode16Bit];
771 }
SwitchMode(unsigned mode)772 void SwitchMode(unsigned mode) {
773 MCSubtargetInfo &STI = copySTI();
774 FeatureBitset AllModes({X86::Mode64Bit, X86::Mode32Bit, X86::Mode16Bit});
775 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
776 unsigned FB = ComputeAvailableFeatures(
777 STI.ToggleFeature(OldMode.flip(mode)));
778 setAvailableFeatures(FB);
779
780 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
781 }
782
getPointerWidth()783 unsigned getPointerWidth() {
784 if (is16BitMode()) return 16;
785 if (is32BitMode()) return 32;
786 if (is64BitMode()) return 64;
787 llvm_unreachable("invalid mode");
788 }
789
isParsingIntelSyntax()790 bool isParsingIntelSyntax() {
791 return getParser().getAssemblerDialect();
792 }
793
794 /// @name Auto-generated Matcher Functions
795 /// {
796
797 #define GET_ASSEMBLER_HEADER
798 #include "X86GenAsmMatcher.inc"
799
800 /// }
801
802 public:
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)803 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
804 const MCInstrInfo &mii, const MCTargetOptions &Options)
805 : MCTargetAsmParser(Options, sti), MII(mii), InstInfo(nullptr) {
806
807 // Initialize the set of available features.
808 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
809 Instrumentation.reset(
810 CreateX86AsmInstrumentation(Options, Parser.getContext(), STI));
811 }
812
813 bool ParseRegister(unsigned &RegNo, SMLoc &StartLoc, SMLoc &EndLoc) override;
814
815 void SetFrameRegister(unsigned RegNo) override;
816
817 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
818 SMLoc NameLoc, OperandVector &Operands) override;
819
820 bool ParseDirective(AsmToken DirectiveID) override;
821 };
822 } // end anonymous namespace
823
824 /// @name Auto-generated Match Functions
825 /// {
826
827 static unsigned MatchRegisterName(StringRef Name);
828
829 /// }
830
CheckBaseRegAndIndexReg(unsigned BaseReg,unsigned IndexReg,StringRef & ErrMsg)831 static bool CheckBaseRegAndIndexReg(unsigned BaseReg, unsigned IndexReg,
832 StringRef &ErrMsg) {
833 // If we have both a base register and an index register make sure they are
834 // both 64-bit or 32-bit registers.
835 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
836 if (BaseReg != 0 && IndexReg != 0) {
837 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
838 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
839 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg)) &&
840 IndexReg != X86::RIZ) {
841 ErrMsg = "base register is 64-bit, but index register is not";
842 return true;
843 }
844 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
845 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
846 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) &&
847 IndexReg != X86::EIZ){
848 ErrMsg = "base register is 32-bit, but index register is not";
849 return true;
850 }
851 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
852 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
853 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
854 ErrMsg = "base register is 16-bit, but index register is not";
855 return true;
856 }
857 if (((BaseReg == X86::BX || BaseReg == X86::BP) &&
858 IndexReg != X86::SI && IndexReg != X86::DI) ||
859 ((BaseReg == X86::SI || BaseReg == X86::DI) &&
860 IndexReg != X86::BX && IndexReg != X86::BP)) {
861 ErrMsg = "invalid 16-bit base/index register combination";
862 return true;
863 }
864 }
865 }
866 return false;
867 }
868
ParseRegister(unsigned & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)869 bool X86AsmParser::ParseRegister(unsigned &RegNo,
870 SMLoc &StartLoc, SMLoc &EndLoc) {
871 MCAsmParser &Parser = getParser();
872 RegNo = 0;
873 const AsmToken &PercentTok = Parser.getTok();
874 StartLoc = PercentTok.getLoc();
875
876 // If we encounter a %, ignore it. This code handles registers with and
877 // without the prefix, unprefixed registers can occur in cfi directives.
878 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent))
879 Parser.Lex(); // Eat percent token.
880
881 const AsmToken &Tok = Parser.getTok();
882 EndLoc = Tok.getEndLoc();
883
884 if (Tok.isNot(AsmToken::Identifier)) {
885 if (isParsingIntelSyntax()) return true;
886 return Error(StartLoc, "invalid register name",
887 SMRange(StartLoc, EndLoc));
888 }
889
890 RegNo = MatchRegisterName(Tok.getString());
891
892 // If the match failed, try the register name as lowercase.
893 if (RegNo == 0)
894 RegNo = MatchRegisterName(Tok.getString().lower());
895
896 // The "flags" register cannot be referenced directly.
897 // Treat it as an identifier instead.
898 if (isParsingInlineAsm() && isParsingIntelSyntax() && RegNo == X86::EFLAGS)
899 RegNo = 0;
900
901 if (!is64BitMode()) {
902 // FIXME: This should be done using Requires<Not64BitMode> and
903 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
904 // checked.
905 // FIXME: Check AH, CH, DH, BH cannot be used in an instruction requiring a
906 // REX prefix.
907 if (RegNo == X86::RIZ ||
908 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
909 X86II::isX86_64NonExtLowByteReg(RegNo) ||
910 X86II::isX86_64ExtendedReg(RegNo) ||
911 X86II::is32ExtendedReg(RegNo))
912 return Error(StartLoc, "register %"
913 + Tok.getString() + " is only available in 64-bit mode",
914 SMRange(StartLoc, EndLoc));
915 } else if (!getSTI().getFeatureBits()[X86::FeatureAVX512]) {
916 if (X86II::is32ExtendedReg(RegNo))
917 return Error(StartLoc, "register %"
918 + Tok.getString() + " is only available with AVX512",
919 SMRange(StartLoc, EndLoc));
920 }
921
922 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
923 if (RegNo == 0 && (Tok.getString() == "st" || Tok.getString() == "ST")) {
924 RegNo = X86::ST0;
925 Parser.Lex(); // Eat 'st'
926
927 // Check to see if we have '(4)' after %st.
928 if (getLexer().isNot(AsmToken::LParen))
929 return false;
930 // Lex the paren.
931 getParser().Lex();
932
933 const AsmToken &IntTok = Parser.getTok();
934 if (IntTok.isNot(AsmToken::Integer))
935 return Error(IntTok.getLoc(), "expected stack index");
936 switch (IntTok.getIntVal()) {
937 case 0: RegNo = X86::ST0; break;
938 case 1: RegNo = X86::ST1; break;
939 case 2: RegNo = X86::ST2; break;
940 case 3: RegNo = X86::ST3; break;
941 case 4: RegNo = X86::ST4; break;
942 case 5: RegNo = X86::ST5; break;
943 case 6: RegNo = X86::ST6; break;
944 case 7: RegNo = X86::ST7; break;
945 default: return Error(IntTok.getLoc(), "invalid stack index");
946 }
947
948 if (getParser().Lex().isNot(AsmToken::RParen))
949 return Error(Parser.getTok().getLoc(), "expected ')'");
950
951 EndLoc = Parser.getTok().getEndLoc();
952 Parser.Lex(); // Eat ')'
953 return false;
954 }
955
956 EndLoc = Parser.getTok().getEndLoc();
957
958 // If this is "db[0-7]", match it as an alias
959 // for dr[0-7].
960 if (RegNo == 0 && Tok.getString().size() == 3 &&
961 Tok.getString().startswith("db")) {
962 switch (Tok.getString()[2]) {
963 case '0': RegNo = X86::DR0; break;
964 case '1': RegNo = X86::DR1; break;
965 case '2': RegNo = X86::DR2; break;
966 case '3': RegNo = X86::DR3; break;
967 case '4': RegNo = X86::DR4; break;
968 case '5': RegNo = X86::DR5; break;
969 case '6': RegNo = X86::DR6; break;
970 case '7': RegNo = X86::DR7; break;
971 }
972
973 if (RegNo != 0) {
974 EndLoc = Parser.getTok().getEndLoc();
975 Parser.Lex(); // Eat it.
976 return false;
977 }
978 }
979
980 if (RegNo == 0) {
981 if (isParsingIntelSyntax()) return true;
982 return Error(StartLoc, "invalid register name",
983 SMRange(StartLoc, EndLoc));
984 }
985
986 Parser.Lex(); // Eat identifier token.
987 return false;
988 }
989
SetFrameRegister(unsigned RegNo)990 void X86AsmParser::SetFrameRegister(unsigned RegNo) {
991 Instrumentation->SetInitialFrameRegister(RegNo);
992 }
993
DefaultMemSIOperand(SMLoc Loc)994 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
995 unsigned basereg =
996 is64BitMode() ? X86::RSI : (is32BitMode() ? X86::ESI : X86::SI);
997 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
998 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
999 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1000 Loc, Loc, 0);
1001 }
1002
DefaultMemDIOperand(SMLoc Loc)1003 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1004 unsigned basereg =
1005 is64BitMode() ? X86::RDI : (is32BitMode() ? X86::EDI : X86::DI);
1006 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1007 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1008 /*BaseReg=*/basereg, /*IndexReg=*/0, /*Scale=*/1,
1009 Loc, Loc, 0);
1010 }
1011
IsSIReg(unsigned Reg)1012 bool X86AsmParser::IsSIReg(unsigned Reg) {
1013 switch (Reg) {
1014 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1015 case X86::RSI:
1016 case X86::ESI:
1017 case X86::SI:
1018 return true;
1019 case X86::RDI:
1020 case X86::EDI:
1021 case X86::DI:
1022 return false;
1023 }
1024 }
1025
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1026 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1027 bool IsSIReg) {
1028 switch (RegClassID) {
1029 default: llvm_unreachable("Unexpected register class");
1030 case X86::GR64RegClassID:
1031 return IsSIReg ? X86::RSI : X86::RDI;
1032 case X86::GR32RegClassID:
1033 return IsSIReg ? X86::ESI : X86::EDI;
1034 case X86::GR16RegClassID:
1035 return IsSIReg ? X86::SI : X86::DI;
1036 }
1037 }
1038
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1039 void X86AsmParser::AddDefaultSrcDestOperands(
1040 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1041 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1042 if (isParsingIntelSyntax()) {
1043 Operands.push_back(std::move(Dst));
1044 Operands.push_back(std::move(Src));
1045 }
1046 else {
1047 Operands.push_back(std::move(Src));
1048 Operands.push_back(std::move(Dst));
1049 }
1050 }
1051
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1052 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1053 OperandVector &FinalOperands) {
1054
1055 if (OrigOperands.size() > 1) {
1056 // Check if sizes match, OrigOperands also contains the instruction name
1057 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1058 "Operand size mismatch");
1059
1060 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1061 // Verify types match
1062 int RegClassID = -1;
1063 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1064 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1065 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1066
1067 if (FinalOp.isReg() &&
1068 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1069 // Return false and let a normal complaint about bogus operands happen
1070 return false;
1071
1072 if (FinalOp.isMem()) {
1073
1074 if (!OrigOp.isMem())
1075 // Return false and let a normal complaint about bogus operands happen
1076 return false;
1077
1078 unsigned OrigReg = OrigOp.Mem.BaseReg;
1079 unsigned FinalReg = FinalOp.Mem.BaseReg;
1080
1081 // If we've already encounterd a register class, make sure all register
1082 // bases are of the same register class
1083 if (RegClassID != -1 &&
1084 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1085 return Error(OrigOp.getStartLoc(),
1086 "mismatching source and destination index registers");
1087 }
1088
1089 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1090 RegClassID = X86::GR64RegClassID;
1091 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1092 RegClassID = X86::GR32RegClassID;
1093 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1094 RegClassID = X86::GR16RegClassID;
1095 else
1096 // Unexpected register class type
1097 // Return false and let a normal complaint about bogus operands happen
1098 return false;
1099
1100 bool IsSI = IsSIReg(FinalReg);
1101 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1102
1103 if (FinalReg != OrigReg) {
1104 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1105 Warnings.push_back(std::make_pair(
1106 OrigOp.getStartLoc(),
1107 "memory operand is only for determining the size, " + RegName +
1108 " will be used for the location"));
1109 }
1110
1111 FinalOp.Mem.Size = OrigOp.Mem.Size;
1112 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1113 FinalOp.Mem.BaseReg = FinalReg;
1114 }
1115 }
1116
1117 // Produce warnings only if all the operands passed the adjustment - prevent
1118 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1119 for (auto &WarningMsg : Warnings) {
1120 Warning(WarningMsg.first, WarningMsg.second);
1121 }
1122
1123 // Remove old operands
1124 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1125 OrigOperands.pop_back();
1126 }
1127 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1128 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1129 OrigOperands.push_back(std::move(FinalOperands[i]));
1130
1131 return false;
1132 }
1133
ParseOperand()1134 std::unique_ptr<X86Operand> X86AsmParser::ParseOperand() {
1135 if (isParsingIntelSyntax())
1136 return ParseIntelOperand();
1137 return ParseATTOperand();
1138 }
1139
1140 /// getIntelMemOperandSize - Return intel memory operand size.
getIntelMemOperandSize(StringRef OpStr)1141 static unsigned getIntelMemOperandSize(StringRef OpStr) {
1142 unsigned Size = StringSwitch<unsigned>(OpStr)
1143 .Cases("BYTE", "byte", 8)
1144 .Cases("WORD", "word", 16)
1145 .Cases("DWORD", "dword", 32)
1146 .Cases("FWORD", "fword", 48)
1147 .Cases("QWORD", "qword", 64)
1148 .Cases("MMWORD","mmword", 64)
1149 .Cases("XWORD", "xword", 80)
1150 .Cases("TBYTE", "tbyte", 80)
1151 .Cases("XMMWORD", "xmmword", 128)
1152 .Cases("YMMWORD", "ymmword", 256)
1153 .Cases("ZMMWORD", "zmmword", 512)
1154 .Cases("OPAQUE", "opaque", -1U) // needs to be non-zero, but doesn't matter
1155 .Default(0);
1156 return Size;
1157 }
1158
CreateMemForInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,InlineAsmIdentifierInfo & Info)1159 std::unique_ptr<X86Operand> X86AsmParser::CreateMemForInlineAsm(
1160 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1161 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1162 InlineAsmIdentifierInfo &Info) {
1163 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1164 // some other label reference.
1165 if (isa<MCSymbolRefExpr>(Disp) && Info.OpDecl && !Info.IsVarDecl) {
1166 // Insert an explicit size if the user didn't have one.
1167 if (!Size) {
1168 Size = getPointerWidth();
1169 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1170 /*Len=*/0, Size);
1171 }
1172
1173 // Create an absolute memory reference in order to match against
1174 // instructions taking a PC relative operand.
1175 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size,
1176 Identifier, Info.OpDecl);
1177 }
1178
1179 // We either have a direct symbol reference, or an offset from a symbol. The
1180 // parser always puts the symbol on the LHS, so look there for size
1181 // calculation purposes.
1182 const MCBinaryExpr *BinOp = dyn_cast<MCBinaryExpr>(Disp);
1183 bool IsSymRef =
1184 isa<MCSymbolRefExpr>(BinOp ? BinOp->getLHS() : Disp);
1185 if (IsSymRef) {
1186 if (!Size) {
1187 Size = Info.Type * 8; // Size is in terms of bits in this context.
1188 if (Size)
1189 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1190 /*Len=*/0, Size);
1191 }
1192 }
1193
1194 // When parsing inline assembly we set the base register to a non-zero value
1195 // if we don't know the actual value at this time. This is necessary to
1196 // get the matching correct in some cases.
1197 BaseReg = BaseReg ? BaseReg : 1;
1198 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1199 IndexReg, Scale, Start, End, Size, Identifier,
1200 Info.OpDecl);
1201 }
1202
1203 static void
RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> & AsmRewrites,StringRef SymName,int64_t ImmDisp,int64_t FinalImmDisp,SMLoc & BracLoc,SMLoc & StartInBrac,SMLoc & End)1204 RewriteIntelBracExpression(SmallVectorImpl<AsmRewrite> &AsmRewrites,
1205 StringRef SymName, int64_t ImmDisp,
1206 int64_t FinalImmDisp, SMLoc &BracLoc,
1207 SMLoc &StartInBrac, SMLoc &End) {
1208 // Remove the '[' and ']' from the IR string.
1209 AsmRewrites.emplace_back(AOK_Skip, BracLoc, 1);
1210 AsmRewrites.emplace_back(AOK_Skip, End, 1);
1211
1212 // If ImmDisp is non-zero, then we parsed a displacement before the
1213 // bracketed expression (i.e., ImmDisp [ BaseReg + Scale*IndexReg + Disp])
1214 // If ImmDisp doesn't match the displacement computed by the state machine
1215 // then we have an additional displacement in the bracketed expression.
1216 if (ImmDisp != FinalImmDisp) {
1217 if (ImmDisp) {
1218 // We have an immediate displacement before the bracketed expression.
1219 // Adjust this to match the final immediate displacement.
1220 bool Found = false;
1221 for (AsmRewrite &AR : AsmRewrites) {
1222 if (AR.Loc.getPointer() > BracLoc.getPointer())
1223 continue;
1224 if (AR.Kind == AOK_ImmPrefix || AR.Kind == AOK_Imm) {
1225 assert (!Found && "ImmDisp already rewritten.");
1226 AR.Kind = AOK_Imm;
1227 AR.Len = BracLoc.getPointer() - AR.Loc.getPointer();
1228 AR.Val = FinalImmDisp;
1229 Found = true;
1230 break;
1231 }
1232 }
1233 assert (Found && "Unable to rewrite ImmDisp.");
1234 (void)Found;
1235 } else {
1236 // We have a symbolic and an immediate displacement, but no displacement
1237 // before the bracketed expression. Put the immediate displacement
1238 // before the bracketed expression.
1239 AsmRewrites.emplace_back(AOK_Imm, BracLoc, 0, FinalImmDisp);
1240 }
1241 }
1242 // Remove all the ImmPrefix rewrites within the brackets.
1243 for (AsmRewrite &AR : AsmRewrites) {
1244 if (AR.Loc.getPointer() < StartInBrac.getPointer())
1245 continue;
1246 if (AR.Kind == AOK_ImmPrefix)
1247 AR.Kind = AOK_Delete;
1248 }
1249 const char *SymLocPtr = SymName.data();
1250 // Skip everything before the symbol.
1251 if (unsigned Len = SymLocPtr - StartInBrac.getPointer()) {
1252 assert(Len > 0 && "Expected a non-negative length.");
1253 AsmRewrites.emplace_back(AOK_Skip, StartInBrac, Len);
1254 }
1255 // Skip everything after the symbol.
1256 if (unsigned Len = End.getPointer() - (SymLocPtr + SymName.size())) {
1257 SMLoc Loc = SMLoc::getFromPointer(SymLocPtr + SymName.size());
1258 assert(Len > 0 && "Expected a non-negative length.");
1259 AsmRewrites.emplace_back(AOK_Skip, Loc, Len);
1260 }
1261 }
1262
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1263 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1264 MCAsmParser &Parser = getParser();
1265 const AsmToken &Tok = Parser.getTok();
1266
1267 AsmToken::TokenKind PrevTK = AsmToken::Error;
1268 bool Done = false;
1269 while (!Done) {
1270 bool UpdateLocLex = true;
1271
1272 // The period in the dot operator (e.g., [ebx].foo.bar) is parsed as an
1273 // identifier. Don't try an parse it as a register.
1274 if (Tok.getString().startswith("."))
1275 break;
1276
1277 // If we're parsing an immediate expression, we don't expect a '['.
1278 if (SM.getStopOnLBrac() && getLexer().getKind() == AsmToken::LBrac)
1279 break;
1280
1281 AsmToken::TokenKind TK = getLexer().getKind();
1282 switch (TK) {
1283 default: {
1284 if (SM.isValidEndState()) {
1285 Done = true;
1286 break;
1287 }
1288 return Error(Tok.getLoc(), "unknown token in expression");
1289 }
1290 case AsmToken::EndOfStatement: {
1291 Done = true;
1292 break;
1293 }
1294 case AsmToken::String:
1295 case AsmToken::Identifier: {
1296 // This could be a register or a symbolic displacement.
1297 unsigned TmpReg;
1298 const MCExpr *Val;
1299 SMLoc IdentLoc = Tok.getLoc();
1300 StringRef Identifier = Tok.getString();
1301 if (TK != AsmToken::String && !ParseRegister(TmpReg, IdentLoc, End)) {
1302 SM.onRegister(TmpReg);
1303 UpdateLocLex = false;
1304 break;
1305 } else {
1306 if (!isParsingInlineAsm()) {
1307 if (getParser().parsePrimaryExpr(Val, End))
1308 return Error(Tok.getLoc(), "Unexpected identifier!");
1309 } else {
1310 // This is a dot operator, not an adjacent identifier.
1311 if (Identifier.find('.') != StringRef::npos &&
1312 PrevTK == AsmToken::RBrac) {
1313 return false;
1314 } else {
1315 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1316 if (ParseIntelIdentifier(Val, Identifier, Info,
1317 /*Unevaluated=*/false, End))
1318 return true;
1319 }
1320 }
1321 SM.onIdentifierExpr(Val, Identifier);
1322 UpdateLocLex = false;
1323 break;
1324 }
1325 return Error(Tok.getLoc(), "Unexpected identifier!");
1326 }
1327 case AsmToken::Integer: {
1328 StringRef ErrMsg;
1329 if (isParsingInlineAsm() && SM.getAddImmPrefix())
1330 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Tok.getLoc());
1331 // Look for 'b' or 'f' following an Integer as a directional label
1332 SMLoc Loc = getTok().getLoc();
1333 int64_t IntVal = getTok().getIntVal();
1334 End = consumeToken();
1335 UpdateLocLex = false;
1336 if (getLexer().getKind() == AsmToken::Identifier) {
1337 StringRef IDVal = getTok().getString();
1338 if (IDVal == "f" || IDVal == "b") {
1339 MCSymbol *Sym =
1340 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
1341 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1342 const MCExpr *Val =
1343 MCSymbolRefExpr::create(Sym, Variant, getContext());
1344 if (IDVal == "b" && Sym->isUndefined())
1345 return Error(Loc, "invalid reference to undefined symbol");
1346 StringRef Identifier = Sym->getName();
1347 SM.onIdentifierExpr(Val, Identifier);
1348 End = consumeToken();
1349 } else {
1350 if (SM.onInteger(IntVal, ErrMsg))
1351 return Error(Loc, ErrMsg);
1352 }
1353 } else {
1354 if (SM.onInteger(IntVal, ErrMsg))
1355 return Error(Loc, ErrMsg);
1356 }
1357 break;
1358 }
1359 case AsmToken::Plus: SM.onPlus(); break;
1360 case AsmToken::Minus: SM.onMinus(); break;
1361 case AsmToken::Tilde: SM.onNot(); break;
1362 case AsmToken::Star: SM.onStar(); break;
1363 case AsmToken::Slash: SM.onDivide(); break;
1364 case AsmToken::Pipe: SM.onOr(); break;
1365 case AsmToken::Caret: SM.onXor(); break;
1366 case AsmToken::Amp: SM.onAnd(); break;
1367 case AsmToken::LessLess:
1368 SM.onLShift(); break;
1369 case AsmToken::GreaterGreater:
1370 SM.onRShift(); break;
1371 case AsmToken::LBrac: SM.onLBrac(); break;
1372 case AsmToken::RBrac: SM.onRBrac(); break;
1373 case AsmToken::LParen: SM.onLParen(); break;
1374 case AsmToken::RParen: SM.onRParen(); break;
1375 }
1376 if (SM.hadError())
1377 return Error(Tok.getLoc(), "unknown token in expression");
1378
1379 if (!Done && UpdateLocLex)
1380 End = consumeToken();
1381
1382 PrevTK = TK;
1383 }
1384 return false;
1385 }
1386
1387 std::unique_ptr<X86Operand>
ParseIntelBracExpression(unsigned SegReg,SMLoc Start,int64_t ImmDisp,unsigned Size)1388 X86AsmParser::ParseIntelBracExpression(unsigned SegReg, SMLoc Start,
1389 int64_t ImmDisp, unsigned Size) {
1390 MCAsmParser &Parser = getParser();
1391 const AsmToken &Tok = Parser.getTok();
1392 SMLoc BracLoc = Tok.getLoc(), End = Tok.getEndLoc();
1393 if (getLexer().isNot(AsmToken::LBrac))
1394 return ErrorOperand(BracLoc, "Expected '[' token!");
1395 Parser.Lex(); // Eat '['
1396
1397 SMLoc StartInBrac = Parser.getTok().getLoc();
1398 // Parse [ Symbol + ImmDisp ] and [ BaseReg + Scale*IndexReg + ImmDisp ]. We
1399 // may have already parsed an immediate displacement before the bracketed
1400 // expression.
1401 IntelExprStateMachine SM(ImmDisp, /*StopOnLBrac=*/false, /*AddImmPrefix=*/true);
1402 if (ParseIntelExpression(SM, End))
1403 return nullptr;
1404
1405 const MCExpr *Disp = nullptr;
1406 if (const MCExpr *Sym = SM.getSym()) {
1407 // A symbolic displacement.
1408 Disp = Sym;
1409 if (isParsingInlineAsm())
1410 RewriteIntelBracExpression(*InstInfo->AsmRewrites, SM.getSymName(),
1411 ImmDisp, SM.getImm(), BracLoc, StartInBrac,
1412 End);
1413 }
1414
1415 if (SM.getImm() || !Disp) {
1416 const MCExpr *Imm = MCConstantExpr::create(SM.getImm(), getContext());
1417 if (Disp)
1418 Disp = MCBinaryExpr::createAdd(Disp, Imm, getContext());
1419 else
1420 Disp = Imm; // An immediate displacement only.
1421 }
1422
1423 // Parse struct field access. Intel requires a dot, but MSVC doesn't. MSVC
1424 // will in fact do global lookup the field name inside all global typedefs,
1425 // but we don't emulate that.
1426 if ((Parser.getTok().getKind() == AsmToken::Identifier ||
1427 Parser.getTok().getKind() == AsmToken::Dot ||
1428 Parser.getTok().getKind() == AsmToken::Real) &&
1429 Parser.getTok().getString().find('.') != StringRef::npos) {
1430 const MCExpr *NewDisp;
1431 if (ParseIntelDotOperator(Disp, NewDisp))
1432 return nullptr;
1433
1434 End = Tok.getEndLoc();
1435 Parser.Lex(); // Eat the field.
1436 Disp = NewDisp;
1437 }
1438
1439 int BaseReg = SM.getBaseReg();
1440 int IndexReg = SM.getIndexReg();
1441 int Scale = SM.getScale();
1442 if (!isParsingInlineAsm()) {
1443 // handle [-42]
1444 if (!BaseReg && !IndexReg) {
1445 if (!SegReg)
1446 return X86Operand::CreateMem(getPointerWidth(), Disp, Start, End, Size);
1447 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
1448 Start, End, Size);
1449 }
1450 StringRef ErrMsg;
1451 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
1452 Error(StartInBrac, ErrMsg);
1453 return nullptr;
1454 }
1455 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
1456 IndexReg, Scale, Start, End, Size);
1457 }
1458
1459 InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
1460 return CreateMemForInlineAsm(SegReg, Disp, BaseReg, IndexReg, Scale, Start,
1461 End, Size, SM.getSymName(), Info);
1462 }
1463
1464 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End)1465 bool X86AsmParser::ParseIntelIdentifier(const MCExpr *&Val,
1466 StringRef &Identifier,
1467 InlineAsmIdentifierInfo &Info,
1468 bool IsUnevaluatedOperand, SMLoc &End) {
1469 MCAsmParser &Parser = getParser();
1470 assert(isParsingInlineAsm() && "Expected to be parsing inline assembly.");
1471 Val = nullptr;
1472
1473 StringRef LineBuf(Identifier.data());
1474 void *Result =
1475 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
1476
1477 const AsmToken &Tok = Parser.getTok();
1478 SMLoc Loc = Tok.getLoc();
1479
1480 // Advance the token stream until the end of the current token is
1481 // after the end of what the frontend claimed.
1482 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
1483 do {
1484 End = Tok.getEndLoc();
1485 getLexer().Lex();
1486 } while (End.getPointer() < EndPtr);
1487 Identifier = LineBuf;
1488
1489 // The frontend should end parsing on an assembler token boundary, unless it
1490 // failed parsing.
1491 assert((End.getPointer() == EndPtr || !Result) &&
1492 "frontend claimed part of a token?");
1493
1494 // If the identifier lookup was unsuccessful, assume that we are dealing with
1495 // a label.
1496 if (!Result) {
1497 StringRef InternalName =
1498 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
1499 Loc, false);
1500 assert(InternalName.size() && "We should have an internal name here.");
1501 // Push a rewrite for replacing the identifier name with the internal name.
1502 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
1503 InternalName);
1504 }
1505
1506 // Create the symbol reference.
1507 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
1508 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
1509 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
1510 return false;
1511 }
1512
1513 /// \brief Parse intel style segment override.
1514 std::unique_ptr<X86Operand>
ParseIntelSegmentOverride(unsigned SegReg,SMLoc Start,unsigned Size)1515 X86AsmParser::ParseIntelSegmentOverride(unsigned SegReg, SMLoc Start,
1516 unsigned Size) {
1517 MCAsmParser &Parser = getParser();
1518 assert(SegReg != 0 && "Tried to parse a segment override without a segment!");
1519 const AsmToken &Tok = Parser.getTok(); // Eat colon.
1520 if (Tok.isNot(AsmToken::Colon))
1521 return ErrorOperand(Tok.getLoc(), "Expected ':' token!");
1522 Parser.Lex(); // Eat ':'
1523
1524 int64_t ImmDisp = 0;
1525 if (getLexer().is(AsmToken::Integer)) {
1526 ImmDisp = Tok.getIntVal();
1527 AsmToken ImmDispToken = Parser.Lex(); // Eat the integer.
1528
1529 if (isParsingInlineAsm())
1530 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, ImmDispToken.getLoc());
1531
1532 if (getLexer().isNot(AsmToken::LBrac)) {
1533 // An immediate following a 'segment register', 'colon' token sequence can
1534 // be followed by a bracketed expression. If it isn't we know we have our
1535 // final segment override.
1536 const MCExpr *Disp = MCConstantExpr::create(ImmDisp, getContext());
1537 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
1538 /*BaseReg=*/0, /*IndexReg=*/0, /*Scale=*/1,
1539 Start, ImmDispToken.getEndLoc(), Size);
1540 }
1541 }
1542
1543 if (getLexer().is(AsmToken::LBrac))
1544 return ParseIntelBracExpression(SegReg, Start, ImmDisp, Size);
1545
1546 const MCExpr *Val;
1547 SMLoc End;
1548 if (!isParsingInlineAsm()) {
1549 if (getParser().parsePrimaryExpr(Val, End))
1550 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1551
1552 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1553 }
1554
1555 InlineAsmIdentifierInfo Info;
1556 StringRef Identifier = Tok.getString();
1557 if (ParseIntelIdentifier(Val, Identifier, Info,
1558 /*Unevaluated=*/false, End))
1559 return nullptr;
1560 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0,/*IndexReg=*/0,
1561 /*Scale=*/1, Start, End, Size, Identifier, Info);
1562 }
1563
1564 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
1565 std::unique_ptr<X86Operand>
ParseRoundingModeOp(SMLoc Start,SMLoc End)1566 X86AsmParser::ParseRoundingModeOp(SMLoc Start, SMLoc End) {
1567 MCAsmParser &Parser = getParser();
1568 const AsmToken &Tok = Parser.getTok();
1569 // Eat "{" and mark the current place.
1570 const SMLoc consumedToken = consumeToken();
1571 if (Tok.getIdentifier().startswith("r")){
1572 int rndMode = StringSwitch<int>(Tok.getIdentifier())
1573 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
1574 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
1575 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
1576 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
1577 .Default(-1);
1578 if (-1 == rndMode)
1579 return ErrorOperand(Tok.getLoc(), "Invalid rounding mode.");
1580 Parser.Lex(); // Eat "r*" of r*-sae
1581 if (!getLexer().is(AsmToken::Minus))
1582 return ErrorOperand(Tok.getLoc(), "Expected - at this point");
1583 Parser.Lex(); // Eat "-"
1584 Parser.Lex(); // Eat the sae
1585 if (!getLexer().is(AsmToken::RCurly))
1586 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1587 Parser.Lex(); // Eat "}"
1588 const MCExpr *RndModeOp =
1589 MCConstantExpr::create(rndMode, Parser.getContext());
1590 return X86Operand::CreateImm(RndModeOp, Start, End);
1591 }
1592 if(Tok.getIdentifier().equals("sae")){
1593 Parser.Lex(); // Eat the sae
1594 if (!getLexer().is(AsmToken::RCurly))
1595 return ErrorOperand(Tok.getLoc(), "Expected } at this point");
1596 Parser.Lex(); // Eat "}"
1597 return X86Operand::CreateToken("{sae}", consumedToken);
1598 }
1599 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1600 }
1601 /// ParseIntelMemOperand - Parse intel style memory operand.
ParseIntelMemOperand(int64_t ImmDisp,SMLoc Start,unsigned Size)1602 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelMemOperand(int64_t ImmDisp,
1603 SMLoc Start,
1604 unsigned Size) {
1605 MCAsmParser &Parser = getParser();
1606 const AsmToken &Tok = Parser.getTok();
1607 SMLoc End;
1608
1609 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1610 if (getLexer().is(AsmToken::LBrac))
1611 return ParseIntelBracExpression(/*SegReg=*/0, Start, ImmDisp, Size);
1612 assert(ImmDisp == 0);
1613
1614 const MCExpr *Val;
1615 if (!isParsingInlineAsm()) {
1616 if (getParser().parsePrimaryExpr(Val, End))
1617 return ErrorOperand(Tok.getLoc(), "unknown token in expression");
1618
1619 return X86Operand::CreateMem(getPointerWidth(), Val, Start, End, Size);
1620 }
1621
1622 InlineAsmIdentifierInfo Info;
1623 StringRef Identifier = Tok.getString();
1624 if (ParseIntelIdentifier(Val, Identifier, Info,
1625 /*Unevaluated=*/false, End))
1626 return nullptr;
1627
1628 if (!getLexer().is(AsmToken::LBrac))
1629 return CreateMemForInlineAsm(/*SegReg=*/0, Val, /*BaseReg=*/0, /*IndexReg=*/0,
1630 /*Scale=*/1, Start, End, Size, Identifier, Info);
1631
1632 Parser.Lex(); // Eat '['
1633
1634 // Parse Identifier [ ImmDisp ]
1635 IntelExprStateMachine SM(/*ImmDisp=*/0, /*StopOnLBrac=*/true,
1636 /*AddImmPrefix=*/false);
1637 if (ParseIntelExpression(SM, End))
1638 return nullptr;
1639
1640 if (SM.getSym()) {
1641 Error(Start, "cannot use more than one symbol in memory operand");
1642 return nullptr;
1643 }
1644 if (SM.getBaseReg()) {
1645 Error(Start, "cannot use base register with variable reference");
1646 return nullptr;
1647 }
1648 if (SM.getIndexReg()) {
1649 Error(Start, "cannot use index register with variable reference");
1650 return nullptr;
1651 }
1652
1653 const MCExpr *Disp = MCConstantExpr::create(SM.getImm(), getContext());
1654 // BaseReg is non-zero to avoid assertions. In the context of inline asm,
1655 // we're pointing to a local variable in memory, so the base register is
1656 // really the frame or stack pointer.
1657 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1658 /*BaseReg=*/1, /*IndexReg=*/0, /*Scale=*/1,
1659 Start, End, Size, Identifier, Info.OpDecl);
1660 }
1661
1662 /// Parse the '.' operator.
ParseIntelDotOperator(const MCExpr * Disp,const MCExpr * & NewDisp)1663 bool X86AsmParser::ParseIntelDotOperator(const MCExpr *Disp,
1664 const MCExpr *&NewDisp) {
1665 MCAsmParser &Parser = getParser();
1666 const AsmToken &Tok = Parser.getTok();
1667 int64_t OrigDispVal, DotDispVal;
1668
1669 // FIXME: Handle non-constant expressions.
1670 if (const MCConstantExpr *OrigDisp = dyn_cast<MCConstantExpr>(Disp))
1671 OrigDispVal = OrigDisp->getValue();
1672 else
1673 return Error(Tok.getLoc(), "Non-constant offsets are not supported!");
1674
1675 // Drop the optional '.'.
1676 StringRef DotDispStr = Tok.getString();
1677 if (DotDispStr.startswith("."))
1678 DotDispStr = DotDispStr.drop_front(1);
1679
1680 // .Imm gets lexed as a real.
1681 if (Tok.is(AsmToken::Real)) {
1682 APInt DotDisp;
1683 DotDispStr.getAsInteger(10, DotDisp);
1684 DotDispVal = DotDisp.getZExtValue();
1685 } else if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1686 unsigned DotDisp;
1687 std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
1688 if (SemaCallback->LookupInlineAsmField(BaseMember.first, BaseMember.second,
1689 DotDisp))
1690 return Error(Tok.getLoc(), "Unable to lookup field reference!");
1691 DotDispVal = DotDisp;
1692 } else
1693 return Error(Tok.getLoc(), "Unexpected token type!");
1694
1695 if (isParsingInlineAsm() && Tok.is(AsmToken::Identifier)) {
1696 SMLoc Loc = SMLoc::getFromPointer(DotDispStr.data());
1697 unsigned Len = DotDispStr.size();
1698 unsigned Val = OrigDispVal + DotDispVal;
1699 InstInfo->AsmRewrites->emplace_back(AOK_DotOperator, Loc, Len, Val);
1700 }
1701
1702 NewDisp = MCConstantExpr::create(OrigDispVal + DotDispVal, getContext());
1703 return false;
1704 }
1705
1706 /// Parse the 'offset' operator. This operator is used to specify the
1707 /// location rather then the content of a variable.
ParseIntelOffsetOfOperator()1708 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOffsetOfOperator() {
1709 MCAsmParser &Parser = getParser();
1710 const AsmToken &Tok = Parser.getTok();
1711 SMLoc OffsetOfLoc = Tok.getLoc();
1712 Parser.Lex(); // Eat offset.
1713
1714 const MCExpr *Val;
1715 InlineAsmIdentifierInfo Info;
1716 SMLoc Start = Tok.getLoc(), End;
1717 StringRef Identifier = Tok.getString();
1718 if (ParseIntelIdentifier(Val, Identifier, Info,
1719 /*Unevaluated=*/false, End))
1720 return nullptr;
1721
1722 // Don't emit the offset operator.
1723 InstInfo->AsmRewrites->emplace_back(AOK_Skip, OffsetOfLoc, 7);
1724
1725 // The offset operator will have an 'r' constraint, thus we need to create
1726 // register operand to ensure proper matching. Just pick a GPR based on
1727 // the size of a pointer.
1728 unsigned RegNo =
1729 is64BitMode() ? X86::RBX : (is32BitMode() ? X86::EBX : X86::BX);
1730 return X86Operand::CreateReg(RegNo, Start, End, /*GetAddress=*/true,
1731 OffsetOfLoc, Identifier, Info.OpDecl);
1732 }
1733
1734 enum IntelOperatorKind {
1735 IOK_LENGTH,
1736 IOK_SIZE,
1737 IOK_TYPE
1738 };
1739
1740 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
1741 /// returns the number of elements in an array. It returns the value 1 for
1742 /// non-array variables. The SIZE operator returns the size of a C or C++
1743 /// variable. A variable's size is the product of its LENGTH and TYPE. The
1744 /// TYPE operator returns the size of a C or C++ type or variable. If the
1745 /// variable is an array, TYPE returns the size of a single element.
ParseIntelOperator(unsigned OpKind)1746 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperator(unsigned OpKind) {
1747 MCAsmParser &Parser = getParser();
1748 const AsmToken &Tok = Parser.getTok();
1749 SMLoc TypeLoc = Tok.getLoc();
1750 Parser.Lex(); // Eat operator.
1751
1752 const MCExpr *Val = nullptr;
1753 InlineAsmIdentifierInfo Info;
1754 SMLoc Start = Tok.getLoc(), End;
1755 StringRef Identifier = Tok.getString();
1756 if (ParseIntelIdentifier(Val, Identifier, Info,
1757 /*Unevaluated=*/true, End))
1758 return nullptr;
1759
1760 if (!Info.OpDecl)
1761 return ErrorOperand(Start, "unable to lookup expression");
1762
1763 unsigned CVal = 0;
1764 switch(OpKind) {
1765 default: llvm_unreachable("Unexpected operand kind!");
1766 case IOK_LENGTH: CVal = Info.Length; break;
1767 case IOK_SIZE: CVal = Info.Size; break;
1768 case IOK_TYPE: CVal = Info.Type; break;
1769 }
1770
1771 // Rewrite the type operator and the C or C++ type or variable in terms of an
1772 // immediate. E.g. TYPE foo -> $$4
1773 unsigned Len = End.getPointer() - TypeLoc.getPointer();
1774 InstInfo->AsmRewrites->emplace_back(AOK_Imm, TypeLoc, Len, CVal);
1775
1776 const MCExpr *Imm = MCConstantExpr::create(CVal, getContext());
1777 return X86Operand::CreateImm(Imm, Start, End);
1778 }
1779
ParseIntelOperand()1780 std::unique_ptr<X86Operand> X86AsmParser::ParseIntelOperand() {
1781 MCAsmParser &Parser = getParser();
1782 const AsmToken &Tok = Parser.getTok();
1783 SMLoc Start, End;
1784
1785 // Offset, length, type and size operators.
1786 if (isParsingInlineAsm()) {
1787 StringRef AsmTokStr = Tok.getString();
1788 if (AsmTokStr == "offset" || AsmTokStr == "OFFSET")
1789 return ParseIntelOffsetOfOperator();
1790 if (AsmTokStr == "length" || AsmTokStr == "LENGTH")
1791 return ParseIntelOperator(IOK_LENGTH);
1792 if (AsmTokStr == "size" || AsmTokStr == "SIZE")
1793 return ParseIntelOperator(IOK_SIZE);
1794 if (AsmTokStr == "type" || AsmTokStr == "TYPE")
1795 return ParseIntelOperator(IOK_TYPE);
1796 }
1797
1798 bool PtrInOperand = false;
1799 unsigned Size = getIntelMemOperandSize(Tok.getString());
1800 if (Size) {
1801 Parser.Lex(); // Eat operand size (e.g., byte, word).
1802 if (Tok.getString() != "PTR" && Tok.getString() != "ptr")
1803 return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
1804 Parser.Lex(); // Eat ptr.
1805 PtrInOperand = true;
1806 }
1807 Start = Tok.getLoc();
1808
1809 // Immediate.
1810 if (getLexer().is(AsmToken::Integer) || getLexer().is(AsmToken::Minus) ||
1811 getLexer().is(AsmToken::Tilde) || getLexer().is(AsmToken::LParen)) {
1812 AsmToken StartTok = Tok;
1813 IntelExprStateMachine SM(/*Imm=*/0, /*StopOnLBrac=*/true,
1814 /*AddImmPrefix=*/false);
1815 if (ParseIntelExpression(SM, End))
1816 return nullptr;
1817
1818 int64_t Imm = SM.getImm();
1819 if (isParsingInlineAsm()) {
1820 unsigned Len = Tok.getLoc().getPointer() - Start.getPointer();
1821 if (StartTok.getString().size() == Len)
1822 // Just add a prefix if this wasn't a complex immediate expression.
1823 InstInfo->AsmRewrites->emplace_back(AOK_ImmPrefix, Start);
1824 else
1825 // Otherwise, rewrite the complex expression as a single immediate.
1826 InstInfo->AsmRewrites->emplace_back(AOK_Imm, Start, Len, Imm);
1827 }
1828
1829 if (getLexer().isNot(AsmToken::LBrac)) {
1830 // If a directional label (ie. 1f or 2b) was parsed above from
1831 // ParseIntelExpression() then SM.getSym() was set to a pointer to
1832 // to the MCExpr with the directional local symbol and this is a
1833 // memory operand not an immediate operand.
1834 if (SM.getSym())
1835 return X86Operand::CreateMem(getPointerWidth(), SM.getSym(), Start, End,
1836 Size);
1837
1838 const MCExpr *ImmExpr = MCConstantExpr::create(Imm, getContext());
1839 return X86Operand::CreateImm(ImmExpr, Start, End);
1840 }
1841
1842 // Only positive immediates are valid.
1843 if (Imm < 0)
1844 return ErrorOperand(Start, "expected a positive immediate displacement "
1845 "before bracketed expr.");
1846
1847 // Parse ImmDisp [ BaseReg + Scale*IndexReg + Disp ].
1848 return ParseIntelMemOperand(Imm, Start, Size);
1849 }
1850
1851 // rounding mode token
1852 if (getSTI().getFeatureBits()[X86::FeatureAVX512] &&
1853 getLexer().is(AsmToken::LCurly))
1854 return ParseRoundingModeOp(Start, End);
1855
1856 // Register.
1857 unsigned RegNo = 0;
1858 if (!ParseRegister(RegNo, Start, End)) {
1859 // If this is a segment register followed by a ':', then this is the start
1860 // of a segment override, otherwise this is a normal register reference.
1861 // In case it is a normal register and there is ptr in the operand this
1862 // is an error
1863 if (getLexer().isNot(AsmToken::Colon)){
1864 if (PtrInOperand){
1865 return ErrorOperand(Start, "expected memory operand after "
1866 "'ptr', found register operand instead");
1867 }
1868 return X86Operand::CreateReg(RegNo, Start, End);
1869 }
1870
1871 return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size);
1872 }
1873
1874 // Memory operand.
1875 return ParseIntelMemOperand(/*Disp=*/0, Start, Size);
1876 }
1877
ParseATTOperand()1878 std::unique_ptr<X86Operand> X86AsmParser::ParseATTOperand() {
1879 MCAsmParser &Parser = getParser();
1880 switch (getLexer().getKind()) {
1881 default:
1882 // Parse a memory operand with no segment register.
1883 return ParseMemOperand(0, Parser.getTok().getLoc());
1884 case AsmToken::Percent: {
1885 // Read the register.
1886 unsigned RegNo;
1887 SMLoc Start, End;
1888 if (ParseRegister(RegNo, Start, End)) return nullptr;
1889 if (RegNo == X86::EIZ || RegNo == X86::RIZ) {
1890 Error(Start, "%eiz and %riz can only be used as index registers",
1891 SMRange(Start, End));
1892 return nullptr;
1893 }
1894
1895 // If this is a segment register followed by a ':', then this is the start
1896 // of a memory reference, otherwise this is a normal register reference.
1897 if (getLexer().isNot(AsmToken::Colon))
1898 return X86Operand::CreateReg(RegNo, Start, End);
1899
1900 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
1901 return ErrorOperand(Start, "invalid segment register");
1902
1903 getParser().Lex(); // Eat the colon.
1904 return ParseMemOperand(RegNo, Start);
1905 }
1906 case AsmToken::Dollar: {
1907 // $42 -> immediate.
1908 SMLoc Start = Parser.getTok().getLoc(), End;
1909 Parser.Lex();
1910 const MCExpr *Val;
1911 if (getParser().parseExpression(Val, End))
1912 return nullptr;
1913 return X86Operand::CreateImm(Val, Start, End);
1914 }
1915 case AsmToken::LCurly:{
1916 SMLoc Start = Parser.getTok().getLoc(), End;
1917 if (getSTI().getFeatureBits()[X86::FeatureAVX512])
1918 return ParseRoundingModeOp(Start, End);
1919 return ErrorOperand(Start, "unknown token in expression");
1920 }
1921 }
1922 }
1923
HandleAVX512Operand(OperandVector & Operands,const MCParsedAsmOperand & Op)1924 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands,
1925 const MCParsedAsmOperand &Op) {
1926 MCAsmParser &Parser = getParser();
1927 if(getSTI().getFeatureBits()[X86::FeatureAVX512]) {
1928 if (getLexer().is(AsmToken::LCurly)) {
1929 // Eat "{" and mark the current place.
1930 const SMLoc consumedToken = consumeToken();
1931 // Distinguish {1to<NUM>} from {%k<NUM>}.
1932 if(getLexer().is(AsmToken::Integer)) {
1933 // Parse memory broadcasting ({1to<NUM>}).
1934 if (getLexer().getTok().getIntVal() != 1)
1935 return !ErrorAndEatStatement(getLexer().getLoc(),
1936 "Expected 1to<NUM> at this point");
1937 Parser.Lex(); // Eat "1" of 1to8
1938 if (!getLexer().is(AsmToken::Identifier) ||
1939 !getLexer().getTok().getIdentifier().startswith("to"))
1940 return !ErrorAndEatStatement(getLexer().getLoc(),
1941 "Expected 1to<NUM> at this point");
1942 // Recognize only reasonable suffixes.
1943 const char *BroadcastPrimitive =
1944 StringSwitch<const char*>(getLexer().getTok().getIdentifier())
1945 .Case("to2", "{1to2}")
1946 .Case("to4", "{1to4}")
1947 .Case("to8", "{1to8}")
1948 .Case("to16", "{1to16}")
1949 .Default(nullptr);
1950 if (!BroadcastPrimitive)
1951 return !ErrorAndEatStatement(getLexer().getLoc(),
1952 "Invalid memory broadcast primitive.");
1953 Parser.Lex(); // Eat "toN" of 1toN
1954 if (!getLexer().is(AsmToken::RCurly))
1955 return !ErrorAndEatStatement(getLexer().getLoc(),
1956 "Expected } at this point");
1957 Parser.Lex(); // Eat "}"
1958 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
1959 consumedToken));
1960 // No AVX512 specific primitives can pass
1961 // after memory broadcasting, so return.
1962 return true;
1963 } else {
1964 // Parse mask register {%k1}
1965 Operands.push_back(X86Operand::CreateToken("{", consumedToken));
1966 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
1967 Operands.push_back(std::move(Op));
1968 if (!getLexer().is(AsmToken::RCurly))
1969 return !ErrorAndEatStatement(getLexer().getLoc(),
1970 "Expected } at this point");
1971 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
1972
1973 // Parse "zeroing non-masked" semantic {z}
1974 if (getLexer().is(AsmToken::LCurly)) {
1975 Operands.push_back(X86Operand::CreateToken("{z}", consumeToken()));
1976 if (!getLexer().is(AsmToken::Identifier) ||
1977 getLexer().getTok().getIdentifier() != "z")
1978 return !ErrorAndEatStatement(getLexer().getLoc(),
1979 "Expected z at this point");
1980 Parser.Lex(); // Eat the z
1981 if (!getLexer().is(AsmToken::RCurly))
1982 return !ErrorAndEatStatement(getLexer().getLoc(),
1983 "Expected } at this point");
1984 Parser.Lex(); // Eat the }
1985 }
1986 }
1987 }
1988 }
1989 }
1990 return true;
1991 }
1992
1993 /// ParseMemOperand: segment: disp(basereg, indexreg, scale). The '%ds:' prefix
1994 /// has already been parsed if present.
ParseMemOperand(unsigned SegReg,SMLoc MemStart)1995 std::unique_ptr<X86Operand> X86AsmParser::ParseMemOperand(unsigned SegReg,
1996 SMLoc MemStart) {
1997
1998 MCAsmParser &Parser = getParser();
1999 // We have to disambiguate a parenthesized expression "(4+5)" from the start
2000 // of a memory operand with a missing displacement "(%ebx)" or "(,%eax)". The
2001 // only way to do this without lookahead is to eat the '(' and see what is
2002 // after it.
2003 const MCExpr *Disp = MCConstantExpr::create(0, getParser().getContext());
2004 if (getLexer().isNot(AsmToken::LParen)) {
2005 SMLoc ExprEnd;
2006 if (getParser().parseExpression(Disp, ExprEnd)) return nullptr;
2007
2008 // After parsing the base expression we could either have a parenthesized
2009 // memory address or not. If not, return now. If so, eat the (.
2010 if (getLexer().isNot(AsmToken::LParen)) {
2011 // Unless we have a segment register, treat this as an immediate.
2012 if (SegReg == 0)
2013 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, ExprEnd);
2014 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2015 MemStart, ExprEnd);
2016 }
2017
2018 // Eat the '('.
2019 Parser.Lex();
2020 } else {
2021 // Okay, we have a '('. We don't know if this is an expression or not, but
2022 // so we have to eat the ( to see beyond it.
2023 SMLoc LParenLoc = Parser.getTok().getLoc();
2024 Parser.Lex(); // Eat the '('.
2025
2026 if (getLexer().is(AsmToken::Percent) || getLexer().is(AsmToken::Comma)) {
2027 // Nothing to do here, fall into the code below with the '(' part of the
2028 // memory operand consumed.
2029 } else {
2030 SMLoc ExprEnd;
2031
2032 // It must be an parenthesized expression, parse it now.
2033 if (getParser().parseParenExpression(Disp, ExprEnd))
2034 return nullptr;
2035
2036 // After parsing the base expression we could either have a parenthesized
2037 // memory address or not. If not, return now. If so, eat the (.
2038 if (getLexer().isNot(AsmToken::LParen)) {
2039 // Unless we have a segment register, treat this as an immediate.
2040 if (SegReg == 0)
2041 return X86Operand::CreateMem(getPointerWidth(), Disp, LParenLoc,
2042 ExprEnd);
2043 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, 0, 0, 1,
2044 MemStart, ExprEnd);
2045 }
2046
2047 // Eat the '('.
2048 Parser.Lex();
2049 }
2050 }
2051
2052 // If we reached here, then we just ate the ( of the memory operand. Process
2053 // the rest of the memory operand.
2054 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2055 SMLoc IndexLoc, BaseLoc;
2056
2057 if (getLexer().is(AsmToken::Percent)) {
2058 SMLoc StartLoc, EndLoc;
2059 BaseLoc = Parser.getTok().getLoc();
2060 if (ParseRegister(BaseReg, StartLoc, EndLoc)) return nullptr;
2061 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ) {
2062 Error(StartLoc, "eiz and riz can only be used as index registers",
2063 SMRange(StartLoc, EndLoc));
2064 return nullptr;
2065 }
2066 }
2067
2068 if (getLexer().is(AsmToken::Comma)) {
2069 Parser.Lex(); // Eat the comma.
2070 IndexLoc = Parser.getTok().getLoc();
2071
2072 // Following the comma we should have either an index register, or a scale
2073 // value. We don't support the later form, but we want to parse it
2074 // correctly.
2075 //
2076 // Not that even though it would be completely consistent to support syntax
2077 // like "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2078 if (getLexer().is(AsmToken::Percent)) {
2079 SMLoc L;
2080 if (ParseRegister(IndexReg, L, L)) return nullptr;
2081
2082 if (getLexer().isNot(AsmToken::RParen)) {
2083 // Parse the scale amount:
2084 // ::= ',' [scale-expression]
2085 if (getLexer().isNot(AsmToken::Comma)) {
2086 Error(Parser.getTok().getLoc(),
2087 "expected comma in scale expression");
2088 return nullptr;
2089 }
2090 Parser.Lex(); // Eat the comma.
2091
2092 if (getLexer().isNot(AsmToken::RParen)) {
2093 SMLoc Loc = Parser.getTok().getLoc();
2094
2095 int64_t ScaleVal;
2096 if (getParser().parseAbsoluteExpression(ScaleVal)){
2097 Error(Loc, "expected scale expression");
2098 return nullptr;
2099 }
2100
2101 // Validate the scale amount.
2102 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2103 ScaleVal != 1) {
2104 Error(Loc, "scale factor in 16-bit address must be 1");
2105 return nullptr;
2106 }
2107 if (ScaleVal != 1 && ScaleVal != 2 && ScaleVal != 4 &&
2108 ScaleVal != 8) {
2109 Error(Loc, "scale factor in address must be 1, 2, 4 or 8");
2110 return nullptr;
2111 }
2112 Scale = (unsigned)ScaleVal;
2113 }
2114 }
2115 } else if (getLexer().isNot(AsmToken::RParen)) {
2116 // A scale amount without an index is ignored.
2117 // index.
2118 SMLoc Loc = Parser.getTok().getLoc();
2119
2120 int64_t Value;
2121 if (getParser().parseAbsoluteExpression(Value))
2122 return nullptr;
2123
2124 if (Value != 1)
2125 Warning(Loc, "scale factor without index register is ignored");
2126 Scale = 1;
2127 }
2128 }
2129
2130 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
2131 if (getLexer().isNot(AsmToken::RParen)) {
2132 Error(Parser.getTok().getLoc(), "unexpected token in memory operand");
2133 return nullptr;
2134 }
2135 SMLoc MemEnd = Parser.getTok().getEndLoc();
2136 Parser.Lex(); // Eat the ')'.
2137
2138 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
2139 // and then only in non-64-bit modes. Except for DX, which is a special case
2140 // because an unofficial form of in/out instructions uses it.
2141 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
2142 (is64BitMode() || (BaseReg != X86::BX && BaseReg != X86::BP &&
2143 BaseReg != X86::SI && BaseReg != X86::DI)) &&
2144 BaseReg != X86::DX) {
2145 Error(BaseLoc, "invalid 16-bit base register");
2146 return nullptr;
2147 }
2148 if (BaseReg == 0 &&
2149 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
2150 Error(IndexLoc, "16-bit memory operand may not include only index register");
2151 return nullptr;
2152 }
2153
2154 StringRef ErrMsg;
2155 if (CheckBaseRegAndIndexReg(BaseReg, IndexReg, ErrMsg)) {
2156 Error(BaseLoc, ErrMsg);
2157 return nullptr;
2158 }
2159
2160 if (SegReg || BaseReg || IndexReg)
2161 return X86Operand::CreateMem(getPointerWidth(), SegReg, Disp, BaseReg,
2162 IndexReg, Scale, MemStart, MemEnd);
2163 return X86Operand::CreateMem(getPointerWidth(), Disp, MemStart, MemEnd);
2164 }
2165
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)2166 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
2167 SMLoc NameLoc, OperandVector &Operands) {
2168 MCAsmParser &Parser = getParser();
2169 InstInfo = &Info;
2170 StringRef PatchedName = Name;
2171
2172 // FIXME: Hack to recognize setneb as setne.
2173 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
2174 PatchedName != "setb" && PatchedName != "setnb")
2175 PatchedName = PatchedName.substr(0, Name.size()-1);
2176
2177 // FIXME: Hack to recognize cmp<comparison code>{ss,sd,ps,pd}.
2178 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
2179 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
2180 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
2181 bool IsVCMP = PatchedName[0] == 'v';
2182 unsigned CCIdx = IsVCMP ? 4 : 3;
2183 unsigned ComparisonCode = StringSwitch<unsigned>(
2184 PatchedName.slice(CCIdx, PatchedName.size() - 2))
2185 .Case("eq", 0x00)
2186 .Case("eq_oq", 0x00)
2187 .Case("lt", 0x01)
2188 .Case("lt_os", 0x01)
2189 .Case("le", 0x02)
2190 .Case("le_os", 0x02)
2191 .Case("unord", 0x03)
2192 .Case("unord_q", 0x03)
2193 .Case("neq", 0x04)
2194 .Case("neq_uq", 0x04)
2195 .Case("nlt", 0x05)
2196 .Case("nlt_us", 0x05)
2197 .Case("nle", 0x06)
2198 .Case("nle_us", 0x06)
2199 .Case("ord", 0x07)
2200 .Case("ord_q", 0x07)
2201 /* AVX only from here */
2202 .Case("eq_uq", 0x08)
2203 .Case("nge", 0x09)
2204 .Case("nge_us", 0x09)
2205 .Case("ngt", 0x0A)
2206 .Case("ngt_us", 0x0A)
2207 .Case("false", 0x0B)
2208 .Case("false_oq", 0x0B)
2209 .Case("neq_oq", 0x0C)
2210 .Case("ge", 0x0D)
2211 .Case("ge_os", 0x0D)
2212 .Case("gt", 0x0E)
2213 .Case("gt_os", 0x0E)
2214 .Case("true", 0x0F)
2215 .Case("true_uq", 0x0F)
2216 .Case("eq_os", 0x10)
2217 .Case("lt_oq", 0x11)
2218 .Case("le_oq", 0x12)
2219 .Case("unord_s", 0x13)
2220 .Case("neq_us", 0x14)
2221 .Case("nlt_uq", 0x15)
2222 .Case("nle_uq", 0x16)
2223 .Case("ord_s", 0x17)
2224 .Case("eq_us", 0x18)
2225 .Case("nge_uq", 0x19)
2226 .Case("ngt_uq", 0x1A)
2227 .Case("false_os", 0x1B)
2228 .Case("neq_os", 0x1C)
2229 .Case("ge_oq", 0x1D)
2230 .Case("gt_oq", 0x1E)
2231 .Case("true_us", 0x1F)
2232 .Default(~0U);
2233 if (ComparisonCode != ~0U && (IsVCMP || ComparisonCode < 8)) {
2234
2235 Operands.push_back(X86Operand::CreateToken(PatchedName.slice(0, CCIdx),
2236 NameLoc));
2237
2238 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2239 getParser().getContext());
2240 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2241
2242 PatchedName = PatchedName.substr(PatchedName.size() - 2);
2243 }
2244 }
2245
2246 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2247 if (PatchedName.startswith("vpcmp") &&
2248 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2249 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2250 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2251 unsigned ComparisonCode = StringSwitch<unsigned>(
2252 PatchedName.slice(5, PatchedName.size() - CCIdx))
2253 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
2254 .Case("lt", 0x1)
2255 .Case("le", 0x2)
2256 //.Case("false", 0x3) // Not a documented alias.
2257 .Case("neq", 0x4)
2258 .Case("nlt", 0x5)
2259 .Case("nle", 0x6)
2260 //.Case("true", 0x7) // Not a documented alias.
2261 .Default(~0U);
2262 if (ComparisonCode != ~0U && (ComparisonCode != 0 || CCIdx == 2)) {
2263 Operands.push_back(X86Operand::CreateToken("vpcmp", NameLoc));
2264
2265 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2266 getParser().getContext());
2267 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2268
2269 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2270 }
2271 }
2272
2273 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
2274 if (PatchedName.startswith("vpcom") &&
2275 (PatchedName.endswith("b") || PatchedName.endswith("w") ||
2276 PatchedName.endswith("d") || PatchedName.endswith("q"))) {
2277 unsigned CCIdx = PatchedName.drop_back().back() == 'u' ? 2 : 1;
2278 unsigned ComparisonCode = StringSwitch<unsigned>(
2279 PatchedName.slice(5, PatchedName.size() - CCIdx))
2280 .Case("lt", 0x0)
2281 .Case("le", 0x1)
2282 .Case("gt", 0x2)
2283 .Case("ge", 0x3)
2284 .Case("eq", 0x4)
2285 .Case("neq", 0x5)
2286 .Case("false", 0x6)
2287 .Case("true", 0x7)
2288 .Default(~0U);
2289 if (ComparisonCode != ~0U) {
2290 Operands.push_back(X86Operand::CreateToken("vpcom", NameLoc));
2291
2292 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonCode,
2293 getParser().getContext());
2294 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
2295
2296 PatchedName = PatchedName.substr(PatchedName.size() - CCIdx);
2297 }
2298 }
2299
2300 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
2301
2302 // Determine whether this is an instruction prefix.
2303 bool isPrefix =
2304 Name == "lock" || Name == "rep" ||
2305 Name == "repe" || Name == "repz" ||
2306 Name == "repne" || Name == "repnz" ||
2307 Name == "rex64" || Name == "data16";
2308
2309 bool CurlyAsEndOfStatement = false;
2310 // This does the actual operand parsing. Don't parse any more if we have a
2311 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
2312 // just want to parse the "lock" as the first instruction and the "incl" as
2313 // the next one.
2314 if (getLexer().isNot(AsmToken::EndOfStatement) && !isPrefix) {
2315
2316 // Parse '*' modifier.
2317 if (getLexer().is(AsmToken::Star))
2318 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2319
2320 // Read the operands.
2321 while(1) {
2322 if (std::unique_ptr<X86Operand> Op = ParseOperand()) {
2323 Operands.push_back(std::move(Op));
2324 if (!HandleAVX512Operand(Operands, *Operands.back()))
2325 return true;
2326 } else {
2327 Parser.eatToEndOfStatement();
2328 return true;
2329 }
2330 // check for comma and eat it
2331 if (getLexer().is(AsmToken::Comma))
2332 Parser.Lex();
2333 else
2334 break;
2335 }
2336
2337 // In MS inline asm curly braces mark the begining/end of a block, therefore
2338 // they should be interepreted as end of statement
2339 CurlyAsEndOfStatement =
2340 isParsingIntelSyntax() && isParsingInlineAsm() &&
2341 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
2342 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
2343 return ErrorAndEatStatement(getLexer().getLoc(),
2344 "unexpected token in argument list");
2345 }
2346
2347 // Consume the EndOfStatement or the prefix separator Slash
2348 if (getLexer().is(AsmToken::EndOfStatement) ||
2349 (isPrefix && getLexer().is(AsmToken::Slash)))
2350 Parser.Lex();
2351 else if (CurlyAsEndOfStatement)
2352 // Add an actual EndOfStatement before the curly brace
2353 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
2354 getLexer().getTok().getLoc(), 0);
2355
2356 // This is for gas compatibility and cannot be done in td.
2357 // Adding "p" for some floating point with no argument.
2358 // For example: fsub --> fsubp
2359 bool IsFp =
2360 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
2361 if (IsFp && Operands.size() == 1) {
2362 const char *Repl = StringSwitch<const char *>(Name)
2363 .Case("fsub", "fsubp")
2364 .Case("fdiv", "fdivp")
2365 .Case("fsubr", "fsubrp")
2366 .Case("fdivr", "fdivrp");
2367 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
2368 }
2369
2370 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
2371 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
2372 // documented form in various unofficial manuals, so a lot of code uses it.
2373 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
2374 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
2375 Operands.size() == 3) {
2376 X86Operand &Op = (X86Operand &)*Operands.back();
2377 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2378 isa<MCConstantExpr>(Op.Mem.Disp) &&
2379 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2380 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2381 SMLoc Loc = Op.getEndLoc();
2382 Operands.back() = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2383 }
2384 }
2385 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
2386 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
2387 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
2388 Operands.size() == 3) {
2389 X86Operand &Op = (X86Operand &)*Operands[1];
2390 if (Op.isMem() && Op.Mem.SegReg == 0 &&
2391 isa<MCConstantExpr>(Op.Mem.Disp) &&
2392 cast<MCConstantExpr>(Op.Mem.Disp)->getValue() == 0 &&
2393 Op.Mem.BaseReg == MatchRegisterName("dx") && Op.Mem.IndexReg == 0) {
2394 SMLoc Loc = Op.getEndLoc();
2395 Operands[1] = X86Operand::CreateReg(Op.Mem.BaseReg, Loc, Loc);
2396 }
2397 }
2398
2399 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
2400 bool HadVerifyError = false;
2401
2402 // Append default arguments to "ins[bwld]"
2403 if (Name.startswith("ins") &&
2404 (Operands.size() == 1 || Operands.size() == 3) &&
2405 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
2406 Name == "ins")) {
2407
2408 AddDefaultSrcDestOperands(TmpOperands,
2409 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
2410 DefaultMemDIOperand(NameLoc));
2411 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2412 }
2413
2414 // Append default arguments to "outs[bwld]"
2415 if (Name.startswith("outs") &&
2416 (Operands.size() == 1 || Operands.size() == 3) &&
2417 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
2418 Name == "outsd" || Name == "outs")) {
2419 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2420 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
2421 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2422 }
2423
2424 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
2425 // values of $SIREG according to the mode. It would be nice if this
2426 // could be achieved with InstAlias in the tables.
2427 if (Name.startswith("lods") &&
2428 (Operands.size() == 1 || Operands.size() == 2) &&
2429 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
2430 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
2431 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
2432 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2433 }
2434
2435 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
2436 // values of $DIREG according to the mode. It would be nice if this
2437 // could be achieved with InstAlias in the tables.
2438 if (Name.startswith("stos") &&
2439 (Operands.size() == 1 || Operands.size() == 2) &&
2440 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
2441 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
2442 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2443 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2444 }
2445
2446 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
2447 // values of $DIREG according to the mode. It would be nice if this
2448 // could be achieved with InstAlias in the tables.
2449 if (Name.startswith("scas") &&
2450 (Operands.size() == 1 || Operands.size() == 2) &&
2451 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
2452 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
2453 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
2454 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2455 }
2456
2457 // Add default SI and DI operands to "cmps[bwlq]".
2458 if (Name.startswith("cmps") &&
2459 (Operands.size() == 1 || Operands.size() == 3) &&
2460 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
2461 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
2462 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
2463 DefaultMemSIOperand(NameLoc));
2464 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2465 }
2466
2467 // Add default SI and DI operands to "movs[bwlq]".
2468 if (((Name.startswith("movs") &&
2469 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
2470 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
2471 (Name.startswith("smov") &&
2472 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
2473 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
2474 (Operands.size() == 1 || Operands.size() == 3)) {
2475 if (Name == "movsd" && Operands.size() == 1)
2476 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
2477 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
2478 DefaultMemDIOperand(NameLoc));
2479 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
2480 }
2481
2482 // Check if we encountered an error for one the string insturctions
2483 if (HadVerifyError) {
2484 return HadVerifyError;
2485 }
2486
2487 // FIXME: Hack to handle recognize s{hr,ar,hl} $1, <op>. Canonicalize to
2488 // "shift <op>".
2489 if ((Name.startswith("shr") || Name.startswith("sar") ||
2490 Name.startswith("shl") || Name.startswith("sal") ||
2491 Name.startswith("rcl") || Name.startswith("rcr") ||
2492 Name.startswith("rol") || Name.startswith("ror")) &&
2493 Operands.size() == 3) {
2494 if (isParsingIntelSyntax()) {
2495 // Intel syntax
2496 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[2]);
2497 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2498 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2499 Operands.pop_back();
2500 } else {
2501 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2502 if (Op1.isImm() && isa<MCConstantExpr>(Op1.getImm()) &&
2503 cast<MCConstantExpr>(Op1.getImm())->getValue() == 1)
2504 Operands.erase(Operands.begin() + 1);
2505 }
2506 }
2507
2508 // Transforms "int $3" into "int3" as a size optimization. We can't write an
2509 // instalias with an immediate operand yet.
2510 if (Name == "int" && Operands.size() == 2) {
2511 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2512 if (Op1.isImm())
2513 if (auto *CE = dyn_cast<MCConstantExpr>(Op1.getImm()))
2514 if (CE->getValue() == 3) {
2515 Operands.erase(Operands.begin() + 1);
2516 static_cast<X86Operand &>(*Operands[0]).setTokenValue("int3");
2517 }
2518 }
2519
2520 // Transforms "xlat mem8" into "xlatb"
2521 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
2522 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
2523 if (Op1.isMem8()) {
2524 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
2525 "size, (R|E)BX will be used for the location");
2526 Operands.pop_back();
2527 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
2528 }
2529 }
2530
2531 return false;
2532 }
2533
processInstruction(MCInst & Inst,const OperandVector & Ops)2534 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
2535 return false;
2536 }
2537
2538 static const char *getSubtargetFeatureName(uint64_t Val);
2539
EmitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)2540 void X86AsmParser::EmitInstruction(MCInst &Inst, OperandVector &Operands,
2541 MCStreamer &Out) {
2542 Instrumentation->InstrumentAndEmitInstruction(Inst, Operands, getContext(),
2543 MII, Out);
2544 }
2545
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2546 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
2547 OperandVector &Operands,
2548 MCStreamer &Out, uint64_t &ErrorInfo,
2549 bool MatchingInlineAsm) {
2550 if (isParsingIntelSyntax())
2551 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2552 MatchingInlineAsm);
2553 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
2554 MatchingInlineAsm);
2555 }
2556
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)2557 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
2558 OperandVector &Operands, MCStreamer &Out,
2559 bool MatchingInlineAsm) {
2560 // FIXME: This should be replaced with a real .td file alias mechanism.
2561 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
2562 // call.
2563 const char *Repl = StringSwitch<const char *>(Op.getToken())
2564 .Case("finit", "fninit")
2565 .Case("fsave", "fnsave")
2566 .Case("fstcw", "fnstcw")
2567 .Case("fstcww", "fnstcw")
2568 .Case("fstenv", "fnstenv")
2569 .Case("fstsw", "fnstsw")
2570 .Case("fstsww", "fnstsw")
2571 .Case("fclex", "fnclex")
2572 .Default(nullptr);
2573 if (Repl) {
2574 MCInst Inst;
2575 Inst.setOpcode(X86::WAIT);
2576 Inst.setLoc(IDLoc);
2577 if (!MatchingInlineAsm)
2578 EmitInstruction(Inst, Operands, Out);
2579 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
2580 }
2581 }
2582
ErrorMissingFeature(SMLoc IDLoc,uint64_t ErrorInfo,bool MatchingInlineAsm)2583 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc, uint64_t ErrorInfo,
2584 bool MatchingInlineAsm) {
2585 assert(ErrorInfo && "Unknown missing feature!");
2586 ArrayRef<SMRange> EmptyRanges = None;
2587 SmallString<126> Msg;
2588 raw_svector_ostream OS(Msg);
2589 OS << "instruction requires:";
2590 uint64_t Mask = 1;
2591 for (unsigned i = 0; i < (sizeof(ErrorInfo)*8-1); ++i) {
2592 if (ErrorInfo & Mask)
2593 OS << ' ' << getSubtargetFeatureName(ErrorInfo & Mask);
2594 Mask <<= 1;
2595 }
2596 return Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2597 }
2598
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2599 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
2600 OperandVector &Operands,
2601 MCStreamer &Out,
2602 uint64_t &ErrorInfo,
2603 bool MatchingInlineAsm) {
2604 assert(!Operands.empty() && "Unexpect empty operand list!");
2605 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2606 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2607 ArrayRef<SMRange> EmptyRanges = None;
2608
2609 // First, handle aliases that expand to multiple instructions.
2610 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2611
2612 bool WasOriginallyInvalidOperand = false;
2613 MCInst Inst;
2614
2615 // First, try a direct match.
2616 switch (MatchInstructionImpl(Operands, Inst,
2617 ErrorInfo, MatchingInlineAsm,
2618 isParsingIntelSyntax())) {
2619 default: llvm_unreachable("Unexpected match result!");
2620 case Match_Success:
2621 // Some instructions need post-processing to, for example, tweak which
2622 // encoding is selected. Loop on it while changes happen so the
2623 // individual transformations can chain off each other.
2624 if (!MatchingInlineAsm)
2625 while (processInstruction(Inst, Operands))
2626 ;
2627
2628 Inst.setLoc(IDLoc);
2629 if (!MatchingInlineAsm)
2630 EmitInstruction(Inst, Operands, Out);
2631 Opcode = Inst.getOpcode();
2632 return false;
2633 case Match_MissingFeature:
2634 return ErrorMissingFeature(IDLoc, ErrorInfo, MatchingInlineAsm);
2635 case Match_InvalidOperand:
2636 WasOriginallyInvalidOperand = true;
2637 break;
2638 case Match_MnemonicFail:
2639 break;
2640 }
2641
2642 // FIXME: Ideally, we would only attempt suffix matches for things which are
2643 // valid prefixes, and we could just infer the right unambiguous
2644 // type. However, that requires substantially more matcher support than the
2645 // following hack.
2646
2647 // Change the operand to point to a temporary token.
2648 StringRef Base = Op.getToken();
2649 SmallString<16> Tmp;
2650 Tmp += Base;
2651 Tmp += ' ';
2652 Op.setTokenValue(Tmp);
2653
2654 // If this instruction starts with an 'f', then it is a floating point stack
2655 // instruction. These come in up to three forms for 32-bit, 64-bit, and
2656 // 80-bit floating point, which use the suffixes s,l,t respectively.
2657 //
2658 // Otherwise, we assume that this may be an integer instruction, which comes
2659 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
2660 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
2661
2662 // Check for the various suffix matches.
2663 uint64_t ErrorInfoIgnore;
2664 uint64_t ErrorInfoMissingFeature = 0; // Init suppresses compiler warnings.
2665 unsigned Match[4];
2666
2667 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I) {
2668 Tmp.back() = Suffixes[I];
2669 Match[I] = MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2670 MatchingInlineAsm, isParsingIntelSyntax());
2671 // If this returned as a missing feature failure, remember that.
2672 if (Match[I] == Match_MissingFeature)
2673 ErrorInfoMissingFeature = ErrorInfoIgnore;
2674 }
2675
2676 // Restore the old token.
2677 Op.setTokenValue(Base);
2678
2679 // If exactly one matched, then we treat that as a successful match (and the
2680 // instruction will already have been filled in correctly, since the failing
2681 // matches won't have modified it).
2682 unsigned NumSuccessfulMatches =
2683 std::count(std::begin(Match), std::end(Match), Match_Success);
2684 if (NumSuccessfulMatches == 1) {
2685 Inst.setLoc(IDLoc);
2686 if (!MatchingInlineAsm)
2687 EmitInstruction(Inst, Operands, Out);
2688 Opcode = Inst.getOpcode();
2689 return false;
2690 }
2691
2692 // Otherwise, the match failed, try to produce a decent error message.
2693
2694 // If we had multiple suffix matches, then identify this as an ambiguous
2695 // match.
2696 if (NumSuccessfulMatches > 1) {
2697 char MatchChars[4];
2698 unsigned NumMatches = 0;
2699 for (unsigned I = 0, E = array_lengthof(Match); I != E; ++I)
2700 if (Match[I] == Match_Success)
2701 MatchChars[NumMatches++] = Suffixes[I];
2702
2703 SmallString<126> Msg;
2704 raw_svector_ostream OS(Msg);
2705 OS << "ambiguous instructions require an explicit suffix (could be ";
2706 for (unsigned i = 0; i != NumMatches; ++i) {
2707 if (i != 0)
2708 OS << ", ";
2709 if (i + 1 == NumMatches)
2710 OS << "or ";
2711 OS << "'" << Base << MatchChars[i] << "'";
2712 }
2713 OS << ")";
2714 Error(IDLoc, OS.str(), EmptyRanges, MatchingInlineAsm);
2715 return true;
2716 }
2717
2718 // Okay, we know that none of the variants matched successfully.
2719
2720 // If all of the instructions reported an invalid mnemonic, then the original
2721 // mnemonic was invalid.
2722 if (std::count(std::begin(Match), std::end(Match), Match_MnemonicFail) == 4) {
2723 if (!WasOriginallyInvalidOperand) {
2724 ArrayRef<SMRange> Ranges =
2725 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2726 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
2727 Ranges, MatchingInlineAsm);
2728 }
2729
2730 // Recover location info for the operand if we know which was the problem.
2731 if (ErrorInfo != ~0ULL) {
2732 if (ErrorInfo >= Operands.size())
2733 return Error(IDLoc, "too few operands for instruction",
2734 EmptyRanges, MatchingInlineAsm);
2735
2736 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
2737 if (Operand.getStartLoc().isValid()) {
2738 SMRange OperandRange = Operand.getLocRange();
2739 return Error(Operand.getStartLoc(), "invalid operand for instruction",
2740 OperandRange, MatchingInlineAsm);
2741 }
2742 }
2743
2744 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2745 MatchingInlineAsm);
2746 }
2747
2748 // If one instruction matched with a missing feature, report this as a
2749 // missing feature.
2750 if (std::count(std::begin(Match), std::end(Match),
2751 Match_MissingFeature) == 1) {
2752 ErrorInfo = ErrorInfoMissingFeature;
2753 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2754 MatchingInlineAsm);
2755 }
2756
2757 // If one instruction matched with an invalid operand, report this as an
2758 // operand failure.
2759 if (std::count(std::begin(Match), std::end(Match),
2760 Match_InvalidOperand) == 1) {
2761 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2762 MatchingInlineAsm);
2763 }
2764
2765 // If all of these were an outright failure, report it in a useless way.
2766 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
2767 EmptyRanges, MatchingInlineAsm);
2768 return true;
2769 }
2770
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)2771 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
2772 OperandVector &Operands,
2773 MCStreamer &Out,
2774 uint64_t &ErrorInfo,
2775 bool MatchingInlineAsm) {
2776 assert(!Operands.empty() && "Unexpect empty operand list!");
2777 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
2778 assert(Op.isToken() && "Leading operand should always be a mnemonic!");
2779 StringRef Mnemonic = Op.getToken();
2780 ArrayRef<SMRange> EmptyRanges = None;
2781
2782 // First, handle aliases that expand to multiple instructions.
2783 MatchFPUWaitAlias(IDLoc, Op, Operands, Out, MatchingInlineAsm);
2784
2785 MCInst Inst;
2786
2787 // Find one unsized memory operand, if present.
2788 X86Operand *UnsizedMemOp = nullptr;
2789 for (const auto &Op : Operands) {
2790 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
2791 if (X86Op->isMemUnsized())
2792 UnsizedMemOp = X86Op;
2793 }
2794
2795 // Allow some instructions to have implicitly pointer-sized operands. This is
2796 // compatible with gas.
2797 if (UnsizedMemOp) {
2798 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
2799 for (const char *Instr : PtrSizedInstrs) {
2800 if (Mnemonic == Instr) {
2801 UnsizedMemOp->Mem.Size = getPointerWidth();
2802 break;
2803 }
2804 }
2805 }
2806
2807 // If an unsized memory operand is present, try to match with each memory
2808 // operand size. In Intel assembly, the size is not part of the instruction
2809 // mnemonic.
2810 SmallVector<unsigned, 8> Match;
2811 uint64_t ErrorInfoMissingFeature = 0;
2812 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
2813 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
2814 for (unsigned Size : MopSizes) {
2815 UnsizedMemOp->Mem.Size = Size;
2816 uint64_t ErrorInfoIgnore;
2817 unsigned LastOpcode = Inst.getOpcode();
2818 unsigned M =
2819 MatchInstructionImpl(Operands, Inst, ErrorInfoIgnore,
2820 MatchingInlineAsm, isParsingIntelSyntax());
2821 if (Match.empty() || LastOpcode != Inst.getOpcode())
2822 Match.push_back(M);
2823
2824 // If this returned as a missing feature failure, remember that.
2825 if (Match.back() == Match_MissingFeature)
2826 ErrorInfoMissingFeature = ErrorInfoIgnore;
2827 }
2828
2829 // Restore the size of the unsized memory operand if we modified it.
2830 if (UnsizedMemOp)
2831 UnsizedMemOp->Mem.Size = 0;
2832 }
2833
2834 // If we haven't matched anything yet, this is not a basic integer or FPU
2835 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
2836 // matching with the unsized operand.
2837 if (Match.empty()) {
2838 Match.push_back(MatchInstructionImpl(Operands, Inst, ErrorInfo,
2839 MatchingInlineAsm,
2840 isParsingIntelSyntax()));
2841 // If this returned as a missing feature failure, remember that.
2842 if (Match.back() == Match_MissingFeature)
2843 ErrorInfoMissingFeature = ErrorInfo;
2844 }
2845
2846 // Restore the size of the unsized memory operand if we modified it.
2847 if (UnsizedMemOp)
2848 UnsizedMemOp->Mem.Size = 0;
2849
2850 // If it's a bad mnemonic, all results will be the same.
2851 if (Match.back() == Match_MnemonicFail) {
2852 ArrayRef<SMRange> Ranges =
2853 MatchingInlineAsm ? EmptyRanges : Op.getLocRange();
2854 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
2855 Ranges, MatchingInlineAsm);
2856 }
2857
2858 // If exactly one matched, then we treat that as a successful match (and the
2859 // instruction will already have been filled in correctly, since the failing
2860 // matches won't have modified it).
2861 unsigned NumSuccessfulMatches =
2862 std::count(std::begin(Match), std::end(Match), Match_Success);
2863 if (NumSuccessfulMatches == 1) {
2864 // Some instructions need post-processing to, for example, tweak which
2865 // encoding is selected. Loop on it while changes happen so the individual
2866 // transformations can chain off each other.
2867 if (!MatchingInlineAsm)
2868 while (processInstruction(Inst, Operands))
2869 ;
2870 Inst.setLoc(IDLoc);
2871 if (!MatchingInlineAsm)
2872 EmitInstruction(Inst, Operands, Out);
2873 Opcode = Inst.getOpcode();
2874 return false;
2875 } else if (NumSuccessfulMatches > 1) {
2876 assert(UnsizedMemOp &&
2877 "multiple matches only possible with unsized memory operands");
2878 ArrayRef<SMRange> Ranges =
2879 MatchingInlineAsm ? EmptyRanges : UnsizedMemOp->getLocRange();
2880 return Error(UnsizedMemOp->getStartLoc(),
2881 "ambiguous operand size for instruction '" + Mnemonic + "\'",
2882 Ranges, MatchingInlineAsm);
2883 }
2884
2885 // If one instruction matched with a missing feature, report this as a
2886 // missing feature.
2887 if (std::count(std::begin(Match), std::end(Match),
2888 Match_MissingFeature) == 1) {
2889 ErrorInfo = ErrorInfoMissingFeature;
2890 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeature,
2891 MatchingInlineAsm);
2892 }
2893
2894 // If one instruction matched with an invalid operand, report this as an
2895 // operand failure.
2896 if (std::count(std::begin(Match), std::end(Match),
2897 Match_InvalidOperand) == 1) {
2898 return Error(IDLoc, "invalid operand for instruction", EmptyRanges,
2899 MatchingInlineAsm);
2900 }
2901
2902 // If all of these were an outright failure, report it in a useless way.
2903 return Error(IDLoc, "unknown instruction mnemonic", EmptyRanges,
2904 MatchingInlineAsm);
2905 }
2906
OmitRegisterFromClobberLists(unsigned RegNo)2907 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
2908 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
2909 }
2910
ParseDirective(AsmToken DirectiveID)2911 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
2912 MCAsmParser &Parser = getParser();
2913 StringRef IDVal = DirectiveID.getIdentifier();
2914 if (IDVal == ".word")
2915 return ParseDirectiveWord(2, DirectiveID.getLoc());
2916 else if (IDVal.startswith(".code"))
2917 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
2918 else if (IDVal.startswith(".att_syntax")) {
2919 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2920 if (Parser.getTok().getString() == "prefix")
2921 Parser.Lex();
2922 else if (Parser.getTok().getString() == "noprefix")
2923 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
2924 "supported: registers must have a "
2925 "'%' prefix in .att_syntax");
2926 }
2927 getParser().setAssemblerDialect(0);
2928 return false;
2929 } else if (IDVal.startswith(".intel_syntax")) {
2930 getParser().setAssemblerDialect(1);
2931 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2932 if (Parser.getTok().getString() == "noprefix")
2933 Parser.Lex();
2934 else if (Parser.getTok().getString() == "prefix")
2935 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
2936 "supported: registers must not have "
2937 "a '%' prefix in .intel_syntax");
2938 }
2939 return false;
2940 } else if (IDVal == ".even")
2941 return parseDirectiveEven(DirectiveID.getLoc());
2942 return true;
2943 }
2944
2945 /// parseDirectiveEven
2946 /// ::= .even
parseDirectiveEven(SMLoc L)2947 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
2948 const MCSection *Section = getStreamer().getCurrentSection().first;
2949 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2950 TokError("unexpected token in directive");
2951 return false;
2952 }
2953 if (!Section) {
2954 getStreamer().InitSections(false);
2955 Section = getStreamer().getCurrentSection().first;
2956 }
2957 if (Section->UseCodeAlign())
2958 getStreamer().EmitCodeAlignment(2, 0);
2959 else
2960 getStreamer().EmitValueToAlignment(2, 0, 1, 0);
2961 return false;
2962 }
2963 /// ParseDirectiveWord
2964 /// ::= .word [ expression (, expression)* ]
ParseDirectiveWord(unsigned Size,SMLoc L)2965 bool X86AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) {
2966 MCAsmParser &Parser = getParser();
2967 if (getLexer().isNot(AsmToken::EndOfStatement)) {
2968 for (;;) {
2969 const MCExpr *Value;
2970 SMLoc ExprLoc = getLexer().getLoc();
2971 if (getParser().parseExpression(Value))
2972 return false;
2973
2974 if (const auto *MCE = dyn_cast<MCConstantExpr>(Value)) {
2975 assert(Size <= 8 && "Invalid size");
2976 uint64_t IntValue = MCE->getValue();
2977 if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue))
2978 return Error(ExprLoc, "literal value out of range for directive");
2979 getStreamer().EmitIntValue(IntValue, Size);
2980 } else {
2981 getStreamer().EmitValue(Value, Size, ExprLoc);
2982 }
2983
2984 if (getLexer().is(AsmToken::EndOfStatement))
2985 break;
2986
2987 // FIXME: Improve diagnostic.
2988 if (getLexer().isNot(AsmToken::Comma)) {
2989 Error(L, "unexpected token in directive");
2990 return false;
2991 }
2992 Parser.Lex();
2993 }
2994 }
2995
2996 Parser.Lex();
2997 return false;
2998 }
2999
3000 /// ParseDirectiveCode
3001 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)3002 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
3003 MCAsmParser &Parser = getParser();
3004 if (IDVal == ".code16") {
3005 Parser.Lex();
3006 if (!is16BitMode()) {
3007 SwitchMode(X86::Mode16Bit);
3008 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code16);
3009 }
3010 } else if (IDVal == ".code32") {
3011 Parser.Lex();
3012 if (!is32BitMode()) {
3013 SwitchMode(X86::Mode32Bit);
3014 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code32);
3015 }
3016 } else if (IDVal == ".code64") {
3017 Parser.Lex();
3018 if (!is64BitMode()) {
3019 SwitchMode(X86::Mode64Bit);
3020 getParser().getStreamer().EmitAssemblerFlag(MCAF_Code64);
3021 }
3022 } else {
3023 Error(L, "unknown directive " + IDVal);
3024 return false;
3025 }
3026
3027 return false;
3028 }
3029
3030 // Force static initialization.
LLVMInitializeX86AsmParser()3031 extern "C" void LLVMInitializeX86AsmParser() {
3032 RegisterMCAsmParser<X86AsmParser> X(TheX86_32Target);
3033 RegisterMCAsmParser<X86AsmParser> Y(TheX86_64Target);
3034 }
3035
3036 #define GET_REGISTER_MATCHER
3037 #define GET_MATCHER_IMPLEMENTATION
3038 #define GET_SUBTARGET_FEATURE_NAME
3039 #include "X86GenAsmMatcher.inc"
3040