1 //===-- X86AsmParser.cpp - Parse X86 assembly to MCInst instructions ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8
9 #include "MCTargetDesc/X86BaseInfo.h"
10 #include "MCTargetDesc/X86IntelInstPrinter.h"
11 #include "MCTargetDesc/X86MCExpr.h"
12 #include "MCTargetDesc/X86MCTargetDesc.h"
13 #include "MCTargetDesc/X86TargetStreamer.h"
14 #include "TargetInfo/X86TargetInfo.h"
15 #include "X86AsmParserCommon.h"
16 #include "X86Operand.h"
17 #include "llvm/ADT/STLExtras.h"
18 #include "llvm/ADT/SmallString.h"
19 #include "llvm/ADT/SmallVector.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/ADT/Twine.h"
22 #include "llvm/MC/MCContext.h"
23 #include "llvm/MC/MCExpr.h"
24 #include "llvm/MC/MCInst.h"
25 #include "llvm/MC/MCInstrInfo.h"
26 #include "llvm/MC/MCParser/MCAsmLexer.h"
27 #include "llvm/MC/MCParser/MCAsmParser.h"
28 #include "llvm/MC/MCParser/MCParsedAsmOperand.h"
29 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
30 #include "llvm/MC/MCRegisterInfo.h"
31 #include "llvm/MC/MCSection.h"
32 #include "llvm/MC/MCStreamer.h"
33 #include "llvm/MC/MCSubtargetInfo.h"
34 #include "llvm/MC/MCSymbol.h"
35 #include "llvm/MC/TargetRegistry.h"
36 #include "llvm/Support/CommandLine.h"
37 #include "llvm/Support/Compiler.h"
38 #include "llvm/Support/SourceMgr.h"
39 #include "llvm/Support/raw_ostream.h"
40 #include <algorithm>
41 #include <memory>
42
43 using namespace llvm;
44
45 static cl::opt<bool> LVIInlineAsmHardening(
46 "x86-experimental-lvi-inline-asm-hardening",
47 cl::desc("Harden inline assembly code that may be vulnerable to Load Value"
48 " Injection (LVI). This feature is experimental."), cl::Hidden);
49
checkScale(unsigned Scale,StringRef & ErrMsg)50 static bool checkScale(unsigned Scale, StringRef &ErrMsg) {
51 if (Scale != 1 && Scale != 2 && Scale != 4 && Scale != 8) {
52 ErrMsg = "scale factor in address must be 1, 2, 4 or 8";
53 return true;
54 }
55 return false;
56 }
57
58 namespace {
59
60 static const char OpPrecedence[] = {
61 0, // IC_OR
62 1, // IC_XOR
63 2, // IC_AND
64 4, // IC_LSHIFT
65 4, // IC_RSHIFT
66 5, // IC_PLUS
67 5, // IC_MINUS
68 6, // IC_MULTIPLY
69 6, // IC_DIVIDE
70 6, // IC_MOD
71 7, // IC_NOT
72 8, // IC_NEG
73 9, // IC_RPAREN
74 10, // IC_LPAREN
75 0, // IC_IMM
76 0, // IC_REGISTER
77 3, // IC_EQ
78 3, // IC_NE
79 3, // IC_LT
80 3, // IC_LE
81 3, // IC_GT
82 3 // IC_GE
83 };
84
85 class X86AsmParser : public MCTargetAsmParser {
86 ParseInstructionInfo *InstInfo;
87 bool Code16GCC;
88 unsigned ForcedDataPrefix = 0;
89
90 enum VEXEncoding {
91 VEXEncoding_Default,
92 VEXEncoding_VEX,
93 VEXEncoding_VEX2,
94 VEXEncoding_VEX3,
95 VEXEncoding_EVEX,
96 };
97
98 VEXEncoding ForcedVEXEncoding = VEXEncoding_Default;
99
100 enum DispEncoding {
101 DispEncoding_Default,
102 DispEncoding_Disp8,
103 DispEncoding_Disp32,
104 };
105
106 DispEncoding ForcedDispEncoding = DispEncoding_Default;
107
108 private:
consumeToken()109 SMLoc consumeToken() {
110 MCAsmParser &Parser = getParser();
111 SMLoc Result = Parser.getTok().getLoc();
112 Parser.Lex();
113 return Result;
114 }
115
getTargetStreamer()116 X86TargetStreamer &getTargetStreamer() {
117 assert(getParser().getStreamer().getTargetStreamer() &&
118 "do not have a target streamer");
119 MCTargetStreamer &TS = *getParser().getStreamer().getTargetStreamer();
120 return static_cast<X86TargetStreamer &>(TS);
121 }
122
MatchInstruction(const OperandVector & Operands,MCInst & Inst,uint64_t & ErrorInfo,FeatureBitset & MissingFeatures,bool matchingInlineAsm,unsigned VariantID=0)123 unsigned MatchInstruction(const OperandVector &Operands, MCInst &Inst,
124 uint64_t &ErrorInfo, FeatureBitset &MissingFeatures,
125 bool matchingInlineAsm, unsigned VariantID = 0) {
126 // In Code16GCC mode, match as 32-bit.
127 if (Code16GCC)
128 SwitchMode(X86::Is32Bit);
129 unsigned rv = MatchInstructionImpl(Operands, Inst, ErrorInfo,
130 MissingFeatures, matchingInlineAsm,
131 VariantID);
132 if (Code16GCC)
133 SwitchMode(X86::Is16Bit);
134 return rv;
135 }
136
137 enum InfixCalculatorTok {
138 IC_OR = 0,
139 IC_XOR,
140 IC_AND,
141 IC_LSHIFT,
142 IC_RSHIFT,
143 IC_PLUS,
144 IC_MINUS,
145 IC_MULTIPLY,
146 IC_DIVIDE,
147 IC_MOD,
148 IC_NOT,
149 IC_NEG,
150 IC_RPAREN,
151 IC_LPAREN,
152 IC_IMM,
153 IC_REGISTER,
154 IC_EQ,
155 IC_NE,
156 IC_LT,
157 IC_LE,
158 IC_GT,
159 IC_GE
160 };
161
162 enum IntelOperatorKind {
163 IOK_INVALID = 0,
164 IOK_LENGTH,
165 IOK_SIZE,
166 IOK_TYPE,
167 };
168
169 enum MasmOperatorKind {
170 MOK_INVALID = 0,
171 MOK_LENGTHOF,
172 MOK_SIZEOF,
173 MOK_TYPE,
174 };
175
176 class InfixCalculator {
177 typedef std::pair< InfixCalculatorTok, int64_t > ICToken;
178 SmallVector<InfixCalculatorTok, 4> InfixOperatorStack;
179 SmallVector<ICToken, 4> PostfixStack;
180
isUnaryOperator(InfixCalculatorTok Op) const181 bool isUnaryOperator(InfixCalculatorTok Op) const {
182 return Op == IC_NEG || Op == IC_NOT;
183 }
184
185 public:
popOperand()186 int64_t popOperand() {
187 assert (!PostfixStack.empty() && "Poped an empty stack!");
188 ICToken Op = PostfixStack.pop_back_val();
189 if (!(Op.first == IC_IMM || Op.first == IC_REGISTER))
190 return -1; // The invalid Scale value will be caught later by checkScale
191 return Op.second;
192 }
pushOperand(InfixCalculatorTok Op,int64_t Val=0)193 void pushOperand(InfixCalculatorTok Op, int64_t Val = 0) {
194 assert ((Op == IC_IMM || Op == IC_REGISTER) &&
195 "Unexpected operand!");
196 PostfixStack.push_back(std::make_pair(Op, Val));
197 }
198
popOperator()199 void popOperator() { InfixOperatorStack.pop_back(); }
pushOperator(InfixCalculatorTok Op)200 void pushOperator(InfixCalculatorTok Op) {
201 // Push the new operator if the stack is empty.
202 if (InfixOperatorStack.empty()) {
203 InfixOperatorStack.push_back(Op);
204 return;
205 }
206
207 // Push the new operator if it has a higher precedence than the operator
208 // on the top of the stack or the operator on the top of the stack is a
209 // left parentheses.
210 unsigned Idx = InfixOperatorStack.size() - 1;
211 InfixCalculatorTok StackOp = InfixOperatorStack[Idx];
212 if (OpPrecedence[Op] > OpPrecedence[StackOp] || StackOp == IC_LPAREN) {
213 InfixOperatorStack.push_back(Op);
214 return;
215 }
216
217 // The operator on the top of the stack has higher precedence than the
218 // new operator.
219 unsigned ParenCount = 0;
220 while (true) {
221 // Nothing to process.
222 if (InfixOperatorStack.empty())
223 break;
224
225 Idx = InfixOperatorStack.size() - 1;
226 StackOp = InfixOperatorStack[Idx];
227 if (!(OpPrecedence[StackOp] >= OpPrecedence[Op] || ParenCount))
228 break;
229
230 // If we have an even parentheses count and we see a left parentheses,
231 // then stop processing.
232 if (!ParenCount && StackOp == IC_LPAREN)
233 break;
234
235 if (StackOp == IC_RPAREN) {
236 ++ParenCount;
237 InfixOperatorStack.pop_back();
238 } else if (StackOp == IC_LPAREN) {
239 --ParenCount;
240 InfixOperatorStack.pop_back();
241 } else {
242 InfixOperatorStack.pop_back();
243 PostfixStack.push_back(std::make_pair(StackOp, 0));
244 }
245 }
246 // Push the new operator.
247 InfixOperatorStack.push_back(Op);
248 }
249
execute()250 int64_t execute() {
251 // Push any remaining operators onto the postfix stack.
252 while (!InfixOperatorStack.empty()) {
253 InfixCalculatorTok StackOp = InfixOperatorStack.pop_back_val();
254 if (StackOp != IC_LPAREN && StackOp != IC_RPAREN)
255 PostfixStack.push_back(std::make_pair(StackOp, 0));
256 }
257
258 if (PostfixStack.empty())
259 return 0;
260
261 SmallVector<ICToken, 16> OperandStack;
262 for (unsigned i = 0, e = PostfixStack.size(); i != e; ++i) {
263 ICToken Op = PostfixStack[i];
264 if (Op.first == IC_IMM || Op.first == IC_REGISTER) {
265 OperandStack.push_back(Op);
266 } else if (isUnaryOperator(Op.first)) {
267 assert (OperandStack.size() > 0 && "Too few operands.");
268 ICToken Operand = OperandStack.pop_back_val();
269 assert (Operand.first == IC_IMM &&
270 "Unary operation with a register!");
271 switch (Op.first) {
272 default:
273 report_fatal_error("Unexpected operator!");
274 break;
275 case IC_NEG:
276 OperandStack.push_back(std::make_pair(IC_IMM, -Operand.second));
277 break;
278 case IC_NOT:
279 OperandStack.push_back(std::make_pair(IC_IMM, ~Operand.second));
280 break;
281 }
282 } else {
283 assert (OperandStack.size() > 1 && "Too few operands.");
284 int64_t Val;
285 ICToken Op2 = OperandStack.pop_back_val();
286 ICToken Op1 = OperandStack.pop_back_val();
287 switch (Op.first) {
288 default:
289 report_fatal_error("Unexpected operator!");
290 break;
291 case IC_PLUS:
292 Val = Op1.second + Op2.second;
293 OperandStack.push_back(std::make_pair(IC_IMM, Val));
294 break;
295 case IC_MINUS:
296 Val = Op1.second - Op2.second;
297 OperandStack.push_back(std::make_pair(IC_IMM, Val));
298 break;
299 case IC_MULTIPLY:
300 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
301 "Multiply operation with an immediate and a register!");
302 Val = Op1.second * Op2.second;
303 OperandStack.push_back(std::make_pair(IC_IMM, Val));
304 break;
305 case IC_DIVIDE:
306 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
307 "Divide operation with an immediate and a register!");
308 assert (Op2.second != 0 && "Division by zero!");
309 Val = Op1.second / Op2.second;
310 OperandStack.push_back(std::make_pair(IC_IMM, Val));
311 break;
312 case IC_MOD:
313 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
314 "Modulo operation with an immediate and a register!");
315 Val = Op1.second % Op2.second;
316 OperandStack.push_back(std::make_pair(IC_IMM, Val));
317 break;
318 case IC_OR:
319 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
320 "Or operation with an immediate and a register!");
321 Val = Op1.second | Op2.second;
322 OperandStack.push_back(std::make_pair(IC_IMM, Val));
323 break;
324 case IC_XOR:
325 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
326 "Xor operation with an immediate and a register!");
327 Val = Op1.second ^ Op2.second;
328 OperandStack.push_back(std::make_pair(IC_IMM, Val));
329 break;
330 case IC_AND:
331 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
332 "And operation with an immediate and a register!");
333 Val = Op1.second & Op2.second;
334 OperandStack.push_back(std::make_pair(IC_IMM, Val));
335 break;
336 case IC_LSHIFT:
337 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
338 "Left shift operation with an immediate and a register!");
339 Val = Op1.second << Op2.second;
340 OperandStack.push_back(std::make_pair(IC_IMM, Val));
341 break;
342 case IC_RSHIFT:
343 assert (Op1.first == IC_IMM && Op2.first == IC_IMM &&
344 "Right shift operation with an immediate and a register!");
345 Val = Op1.second >> Op2.second;
346 OperandStack.push_back(std::make_pair(IC_IMM, Val));
347 break;
348 case IC_EQ:
349 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
350 "Equals operation with an immediate and a register!");
351 Val = (Op1.second == Op2.second) ? -1 : 0;
352 OperandStack.push_back(std::make_pair(IC_IMM, Val));
353 break;
354 case IC_NE:
355 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
356 "Not-equals operation with an immediate and a register!");
357 Val = (Op1.second != Op2.second) ? -1 : 0;
358 OperandStack.push_back(std::make_pair(IC_IMM, Val));
359 break;
360 case IC_LT:
361 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
362 "Less-than operation with an immediate and a register!");
363 Val = (Op1.second < Op2.second) ? -1 : 0;
364 OperandStack.push_back(std::make_pair(IC_IMM, Val));
365 break;
366 case IC_LE:
367 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
368 "Less-than-or-equal operation with an immediate and a "
369 "register!");
370 Val = (Op1.second <= Op2.second) ? -1 : 0;
371 OperandStack.push_back(std::make_pair(IC_IMM, Val));
372 break;
373 case IC_GT:
374 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
375 "Greater-than operation with an immediate and a register!");
376 Val = (Op1.second > Op2.second) ? -1 : 0;
377 OperandStack.push_back(std::make_pair(IC_IMM, Val));
378 break;
379 case IC_GE:
380 assert(Op1.first == IC_IMM && Op2.first == IC_IMM &&
381 "Greater-than-or-equal operation with an immediate and a "
382 "register!");
383 Val = (Op1.second >= Op2.second) ? -1 : 0;
384 OperandStack.push_back(std::make_pair(IC_IMM, Val));
385 break;
386 }
387 }
388 }
389 assert (OperandStack.size() == 1 && "Expected a single result.");
390 return OperandStack.pop_back_val().second;
391 }
392 };
393
394 enum IntelExprState {
395 IES_INIT,
396 IES_OR,
397 IES_XOR,
398 IES_AND,
399 IES_EQ,
400 IES_NE,
401 IES_LT,
402 IES_LE,
403 IES_GT,
404 IES_GE,
405 IES_LSHIFT,
406 IES_RSHIFT,
407 IES_PLUS,
408 IES_MINUS,
409 IES_OFFSET,
410 IES_CAST,
411 IES_NOT,
412 IES_MULTIPLY,
413 IES_DIVIDE,
414 IES_MOD,
415 IES_LBRAC,
416 IES_RBRAC,
417 IES_LPAREN,
418 IES_RPAREN,
419 IES_REGISTER,
420 IES_INTEGER,
421 IES_IDENTIFIER,
422 IES_ERROR
423 };
424
425 class IntelExprStateMachine {
426 IntelExprState State = IES_INIT, PrevState = IES_ERROR;
427 unsigned BaseReg = 0, IndexReg = 0, TmpReg = 0, Scale = 0;
428 int64_t Imm = 0;
429 const MCExpr *Sym = nullptr;
430 StringRef SymName;
431 InfixCalculator IC;
432 InlineAsmIdentifierInfo Info;
433 short BracCount = 0;
434 bool MemExpr = false;
435 bool OffsetOperator = false;
436 bool AttachToOperandIdx = false;
437 bool IsPIC = false;
438 SMLoc OffsetOperatorLoc;
439 AsmTypeInfo CurType;
440
setSymRef(const MCExpr * Val,StringRef ID,StringRef & ErrMsg)441 bool setSymRef(const MCExpr *Val, StringRef ID, StringRef &ErrMsg) {
442 if (Sym) {
443 ErrMsg = "cannot use more than one symbol in memory operand";
444 return true;
445 }
446 Sym = Val;
447 SymName = ID;
448 return false;
449 }
450
451 public:
452 IntelExprStateMachine() = default;
453
addImm(int64_t imm)454 void addImm(int64_t imm) { Imm += imm; }
getBracCount() const455 short getBracCount() const { return BracCount; }
isMemExpr() const456 bool isMemExpr() const { return MemExpr; }
isOffsetOperator() const457 bool isOffsetOperator() const { return OffsetOperator; }
getOffsetLoc() const458 SMLoc getOffsetLoc() const { return OffsetOperatorLoc; }
getBaseReg() const459 unsigned getBaseReg() const { return BaseReg; }
getIndexReg() const460 unsigned getIndexReg() const { return IndexReg; }
getScale() const461 unsigned getScale() const { return Scale; }
getSym() const462 const MCExpr *getSym() const { return Sym; }
getSymName() const463 StringRef getSymName() const { return SymName; }
getType() const464 StringRef getType() const { return CurType.Name; }
getSize() const465 unsigned getSize() const { return CurType.Size; }
getElementSize() const466 unsigned getElementSize() const { return CurType.ElementSize; }
getLength() const467 unsigned getLength() const { return CurType.Length; }
getImm()468 int64_t getImm() { return Imm + IC.execute(); }
isValidEndState() const469 bool isValidEndState() const {
470 return State == IES_RBRAC || State == IES_INTEGER;
471 }
472
473 // Is the intel expression appended after an operand index.
474 // [OperandIdx][Intel Expression]
475 // This is neccessary for checking if it is an independent
476 // intel expression at back end when parse inline asm.
setAppendAfterOperand()477 void setAppendAfterOperand() { AttachToOperandIdx = true; }
478
isPIC() const479 bool isPIC() const { return IsPIC; }
setPIC()480 void setPIC() { IsPIC = true; }
481
hadError() const482 bool hadError() const { return State == IES_ERROR; }
getIdentifierInfo() const483 const InlineAsmIdentifierInfo &getIdentifierInfo() const { return Info; }
484
regsUseUpError(StringRef & ErrMsg)485 bool regsUseUpError(StringRef &ErrMsg) {
486 // This case mostly happen in inline asm, e.g. Arr[BaseReg + IndexReg]
487 // can not intruduce additional register in inline asm in PIC model.
488 if (IsPIC && AttachToOperandIdx)
489 ErrMsg = "Don't use 2 or more regs for mem offset in PIC model!";
490 else
491 ErrMsg = "BaseReg/IndexReg already set!";
492 return true;
493 }
494
onOr()495 void onOr() {
496 IntelExprState CurrState = State;
497 switch (State) {
498 default:
499 State = IES_ERROR;
500 break;
501 case IES_INTEGER:
502 case IES_RPAREN:
503 case IES_REGISTER:
504 State = IES_OR;
505 IC.pushOperator(IC_OR);
506 break;
507 }
508 PrevState = CurrState;
509 }
onXor()510 void onXor() {
511 IntelExprState CurrState = State;
512 switch (State) {
513 default:
514 State = IES_ERROR;
515 break;
516 case IES_INTEGER:
517 case IES_RPAREN:
518 case IES_REGISTER:
519 State = IES_XOR;
520 IC.pushOperator(IC_XOR);
521 break;
522 }
523 PrevState = CurrState;
524 }
onAnd()525 void onAnd() {
526 IntelExprState CurrState = State;
527 switch (State) {
528 default:
529 State = IES_ERROR;
530 break;
531 case IES_INTEGER:
532 case IES_RPAREN:
533 case IES_REGISTER:
534 State = IES_AND;
535 IC.pushOperator(IC_AND);
536 break;
537 }
538 PrevState = CurrState;
539 }
onEq()540 void onEq() {
541 IntelExprState CurrState = State;
542 switch (State) {
543 default:
544 State = IES_ERROR;
545 break;
546 case IES_INTEGER:
547 case IES_RPAREN:
548 case IES_REGISTER:
549 State = IES_EQ;
550 IC.pushOperator(IC_EQ);
551 break;
552 }
553 PrevState = CurrState;
554 }
onNE()555 void onNE() {
556 IntelExprState CurrState = State;
557 switch (State) {
558 default:
559 State = IES_ERROR;
560 break;
561 case IES_INTEGER:
562 case IES_RPAREN:
563 case IES_REGISTER:
564 State = IES_NE;
565 IC.pushOperator(IC_NE);
566 break;
567 }
568 PrevState = CurrState;
569 }
onLT()570 void onLT() {
571 IntelExprState CurrState = State;
572 switch (State) {
573 default:
574 State = IES_ERROR;
575 break;
576 case IES_INTEGER:
577 case IES_RPAREN:
578 case IES_REGISTER:
579 State = IES_LT;
580 IC.pushOperator(IC_LT);
581 break;
582 }
583 PrevState = CurrState;
584 }
onLE()585 void onLE() {
586 IntelExprState CurrState = State;
587 switch (State) {
588 default:
589 State = IES_ERROR;
590 break;
591 case IES_INTEGER:
592 case IES_RPAREN:
593 case IES_REGISTER:
594 State = IES_LE;
595 IC.pushOperator(IC_LE);
596 break;
597 }
598 PrevState = CurrState;
599 }
onGT()600 void onGT() {
601 IntelExprState CurrState = State;
602 switch (State) {
603 default:
604 State = IES_ERROR;
605 break;
606 case IES_INTEGER:
607 case IES_RPAREN:
608 case IES_REGISTER:
609 State = IES_GT;
610 IC.pushOperator(IC_GT);
611 break;
612 }
613 PrevState = CurrState;
614 }
onGE()615 void onGE() {
616 IntelExprState CurrState = State;
617 switch (State) {
618 default:
619 State = IES_ERROR;
620 break;
621 case IES_INTEGER:
622 case IES_RPAREN:
623 case IES_REGISTER:
624 State = IES_GE;
625 IC.pushOperator(IC_GE);
626 break;
627 }
628 PrevState = CurrState;
629 }
onLShift()630 void onLShift() {
631 IntelExprState CurrState = State;
632 switch (State) {
633 default:
634 State = IES_ERROR;
635 break;
636 case IES_INTEGER:
637 case IES_RPAREN:
638 case IES_REGISTER:
639 State = IES_LSHIFT;
640 IC.pushOperator(IC_LSHIFT);
641 break;
642 }
643 PrevState = CurrState;
644 }
onRShift()645 void onRShift() {
646 IntelExprState CurrState = State;
647 switch (State) {
648 default:
649 State = IES_ERROR;
650 break;
651 case IES_INTEGER:
652 case IES_RPAREN:
653 case IES_REGISTER:
654 State = IES_RSHIFT;
655 IC.pushOperator(IC_RSHIFT);
656 break;
657 }
658 PrevState = CurrState;
659 }
onPlus(StringRef & ErrMsg)660 bool onPlus(StringRef &ErrMsg) {
661 IntelExprState CurrState = State;
662 switch (State) {
663 default:
664 State = IES_ERROR;
665 break;
666 case IES_INTEGER:
667 case IES_RPAREN:
668 case IES_REGISTER:
669 case IES_OFFSET:
670 State = IES_PLUS;
671 IC.pushOperator(IC_PLUS);
672 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
673 // If we already have a BaseReg, then assume this is the IndexReg with
674 // no explicit scale.
675 if (!BaseReg) {
676 BaseReg = TmpReg;
677 } else {
678 if (IndexReg)
679 return regsUseUpError(ErrMsg);
680 IndexReg = TmpReg;
681 Scale = 0;
682 }
683 }
684 break;
685 }
686 PrevState = CurrState;
687 return false;
688 }
onMinus(StringRef & ErrMsg)689 bool onMinus(StringRef &ErrMsg) {
690 IntelExprState CurrState = State;
691 switch (State) {
692 default:
693 State = IES_ERROR;
694 break;
695 case IES_OR:
696 case IES_XOR:
697 case IES_AND:
698 case IES_EQ:
699 case IES_NE:
700 case IES_LT:
701 case IES_LE:
702 case IES_GT:
703 case IES_GE:
704 case IES_LSHIFT:
705 case IES_RSHIFT:
706 case IES_PLUS:
707 case IES_NOT:
708 case IES_MULTIPLY:
709 case IES_DIVIDE:
710 case IES_MOD:
711 case IES_LPAREN:
712 case IES_RPAREN:
713 case IES_LBRAC:
714 case IES_RBRAC:
715 case IES_INTEGER:
716 case IES_REGISTER:
717 case IES_INIT:
718 case IES_OFFSET:
719 State = IES_MINUS;
720 // push minus operator if it is not a negate operator
721 if (CurrState == IES_REGISTER || CurrState == IES_RPAREN ||
722 CurrState == IES_INTEGER || CurrState == IES_RBRAC ||
723 CurrState == IES_OFFSET)
724 IC.pushOperator(IC_MINUS);
725 else if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
726 // We have negate operator for Scale: it's illegal
727 ErrMsg = "Scale can't be negative";
728 return true;
729 } else
730 IC.pushOperator(IC_NEG);
731 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
732 // If we already have a BaseReg, then assume this is the IndexReg with
733 // no explicit scale.
734 if (!BaseReg) {
735 BaseReg = TmpReg;
736 } else {
737 if (IndexReg)
738 return regsUseUpError(ErrMsg);
739 IndexReg = TmpReg;
740 Scale = 0;
741 }
742 }
743 break;
744 }
745 PrevState = CurrState;
746 return false;
747 }
onNot()748 void onNot() {
749 IntelExprState CurrState = State;
750 switch (State) {
751 default:
752 State = IES_ERROR;
753 break;
754 case IES_OR:
755 case IES_XOR:
756 case IES_AND:
757 case IES_EQ:
758 case IES_NE:
759 case IES_LT:
760 case IES_LE:
761 case IES_GT:
762 case IES_GE:
763 case IES_LSHIFT:
764 case IES_RSHIFT:
765 case IES_PLUS:
766 case IES_MINUS:
767 case IES_NOT:
768 case IES_MULTIPLY:
769 case IES_DIVIDE:
770 case IES_MOD:
771 case IES_LPAREN:
772 case IES_LBRAC:
773 case IES_INIT:
774 State = IES_NOT;
775 IC.pushOperator(IC_NOT);
776 break;
777 }
778 PrevState = CurrState;
779 }
onRegister(unsigned Reg,StringRef & ErrMsg)780 bool onRegister(unsigned Reg, StringRef &ErrMsg) {
781 IntelExprState CurrState = State;
782 switch (State) {
783 default:
784 State = IES_ERROR;
785 break;
786 case IES_PLUS:
787 case IES_LPAREN:
788 case IES_LBRAC:
789 State = IES_REGISTER;
790 TmpReg = Reg;
791 IC.pushOperand(IC_REGISTER);
792 break;
793 case IES_MULTIPLY:
794 // Index Register - Scale * Register
795 if (PrevState == IES_INTEGER) {
796 if (IndexReg)
797 return regsUseUpError(ErrMsg);
798 State = IES_REGISTER;
799 IndexReg = Reg;
800 // Get the scale and replace the 'Scale * Register' with '0'.
801 Scale = IC.popOperand();
802 if (checkScale(Scale, ErrMsg))
803 return true;
804 IC.pushOperand(IC_IMM);
805 IC.popOperator();
806 } else {
807 State = IES_ERROR;
808 }
809 break;
810 }
811 PrevState = CurrState;
812 return false;
813 }
onIdentifierExpr(const MCExpr * SymRef,StringRef SymRefName,const InlineAsmIdentifierInfo & IDInfo,const AsmTypeInfo & Type,bool ParsingMSInlineAsm,StringRef & ErrMsg)814 bool onIdentifierExpr(const MCExpr *SymRef, StringRef SymRefName,
815 const InlineAsmIdentifierInfo &IDInfo,
816 const AsmTypeInfo &Type, bool ParsingMSInlineAsm,
817 StringRef &ErrMsg) {
818 // InlineAsm: Treat an enum value as an integer
819 if (ParsingMSInlineAsm)
820 if (IDInfo.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
821 return onInteger(IDInfo.Enum.EnumVal, ErrMsg);
822 // Treat a symbolic constant like an integer
823 if (auto *CE = dyn_cast<MCConstantExpr>(SymRef))
824 return onInteger(CE->getValue(), ErrMsg);
825 PrevState = State;
826 switch (State) {
827 default:
828 State = IES_ERROR;
829 break;
830 case IES_CAST:
831 case IES_PLUS:
832 case IES_MINUS:
833 case IES_NOT:
834 case IES_INIT:
835 case IES_LBRAC:
836 case IES_LPAREN:
837 if (setSymRef(SymRef, SymRefName, ErrMsg))
838 return true;
839 MemExpr = true;
840 State = IES_INTEGER;
841 IC.pushOperand(IC_IMM);
842 if (ParsingMSInlineAsm)
843 Info = IDInfo;
844 setTypeInfo(Type);
845 break;
846 }
847 return false;
848 }
onInteger(int64_t TmpInt,StringRef & ErrMsg)849 bool onInteger(int64_t TmpInt, StringRef &ErrMsg) {
850 IntelExprState CurrState = State;
851 switch (State) {
852 default:
853 State = IES_ERROR;
854 break;
855 case IES_PLUS:
856 case IES_MINUS:
857 case IES_NOT:
858 case IES_OR:
859 case IES_XOR:
860 case IES_AND:
861 case IES_EQ:
862 case IES_NE:
863 case IES_LT:
864 case IES_LE:
865 case IES_GT:
866 case IES_GE:
867 case IES_LSHIFT:
868 case IES_RSHIFT:
869 case IES_DIVIDE:
870 case IES_MOD:
871 case IES_MULTIPLY:
872 case IES_LPAREN:
873 case IES_INIT:
874 case IES_LBRAC:
875 State = IES_INTEGER;
876 if (PrevState == IES_REGISTER && CurrState == IES_MULTIPLY) {
877 // Index Register - Register * Scale
878 if (IndexReg)
879 return regsUseUpError(ErrMsg);
880 IndexReg = TmpReg;
881 Scale = TmpInt;
882 if (checkScale(Scale, ErrMsg))
883 return true;
884 // Get the scale and replace the 'Register * Scale' with '0'.
885 IC.popOperator();
886 } else {
887 IC.pushOperand(IC_IMM, TmpInt);
888 }
889 break;
890 }
891 PrevState = CurrState;
892 return false;
893 }
onStar()894 void onStar() {
895 PrevState = State;
896 switch (State) {
897 default:
898 State = IES_ERROR;
899 break;
900 case IES_INTEGER:
901 case IES_REGISTER:
902 case IES_RPAREN:
903 State = IES_MULTIPLY;
904 IC.pushOperator(IC_MULTIPLY);
905 break;
906 }
907 }
onDivide()908 void onDivide() {
909 PrevState = State;
910 switch (State) {
911 default:
912 State = IES_ERROR;
913 break;
914 case IES_INTEGER:
915 case IES_RPAREN:
916 State = IES_DIVIDE;
917 IC.pushOperator(IC_DIVIDE);
918 break;
919 }
920 }
onMod()921 void onMod() {
922 PrevState = State;
923 switch (State) {
924 default:
925 State = IES_ERROR;
926 break;
927 case IES_INTEGER:
928 case IES_RPAREN:
929 State = IES_MOD;
930 IC.pushOperator(IC_MOD);
931 break;
932 }
933 }
onLBrac()934 bool onLBrac() {
935 if (BracCount)
936 return true;
937 PrevState = State;
938 switch (State) {
939 default:
940 State = IES_ERROR;
941 break;
942 case IES_RBRAC:
943 case IES_INTEGER:
944 case IES_RPAREN:
945 State = IES_PLUS;
946 IC.pushOperator(IC_PLUS);
947 CurType.Length = 1;
948 CurType.Size = CurType.ElementSize;
949 break;
950 case IES_INIT:
951 case IES_CAST:
952 assert(!BracCount && "BracCount should be zero on parsing's start");
953 State = IES_LBRAC;
954 break;
955 }
956 MemExpr = true;
957 BracCount++;
958 return false;
959 }
onRBrac(StringRef & ErrMsg)960 bool onRBrac(StringRef &ErrMsg) {
961 IntelExprState CurrState = State;
962 switch (State) {
963 default:
964 State = IES_ERROR;
965 break;
966 case IES_INTEGER:
967 case IES_OFFSET:
968 case IES_REGISTER:
969 case IES_RPAREN:
970 if (BracCount-- != 1) {
971 ErrMsg = "unexpected bracket encountered";
972 return true;
973 }
974 State = IES_RBRAC;
975 if (CurrState == IES_REGISTER && PrevState != IES_MULTIPLY) {
976 // If we already have a BaseReg, then assume this is the IndexReg with
977 // no explicit scale.
978 if (!BaseReg) {
979 BaseReg = TmpReg;
980 } else {
981 if (IndexReg)
982 return regsUseUpError(ErrMsg);
983 IndexReg = TmpReg;
984 Scale = 0;
985 }
986 }
987 break;
988 }
989 PrevState = CurrState;
990 return false;
991 }
onLParen()992 void onLParen() {
993 IntelExprState CurrState = State;
994 switch (State) {
995 default:
996 State = IES_ERROR;
997 break;
998 case IES_PLUS:
999 case IES_MINUS:
1000 case IES_NOT:
1001 case IES_OR:
1002 case IES_XOR:
1003 case IES_AND:
1004 case IES_EQ:
1005 case IES_NE:
1006 case IES_LT:
1007 case IES_LE:
1008 case IES_GT:
1009 case IES_GE:
1010 case IES_LSHIFT:
1011 case IES_RSHIFT:
1012 case IES_MULTIPLY:
1013 case IES_DIVIDE:
1014 case IES_MOD:
1015 case IES_LPAREN:
1016 case IES_INIT:
1017 case IES_LBRAC:
1018 State = IES_LPAREN;
1019 IC.pushOperator(IC_LPAREN);
1020 break;
1021 }
1022 PrevState = CurrState;
1023 }
onRParen()1024 void onRParen() {
1025 PrevState = State;
1026 switch (State) {
1027 default:
1028 State = IES_ERROR;
1029 break;
1030 case IES_INTEGER:
1031 case IES_OFFSET:
1032 case IES_REGISTER:
1033 case IES_RBRAC:
1034 case IES_RPAREN:
1035 State = IES_RPAREN;
1036 IC.pushOperator(IC_RPAREN);
1037 break;
1038 }
1039 }
onOffset(const MCExpr * Val,SMLoc OffsetLoc,StringRef ID,const InlineAsmIdentifierInfo & IDInfo,bool ParsingMSInlineAsm,StringRef & ErrMsg)1040 bool onOffset(const MCExpr *Val, SMLoc OffsetLoc, StringRef ID,
1041 const InlineAsmIdentifierInfo &IDInfo,
1042 bool ParsingMSInlineAsm, StringRef &ErrMsg) {
1043 PrevState = State;
1044 switch (State) {
1045 default:
1046 ErrMsg = "unexpected offset operator expression";
1047 return true;
1048 case IES_PLUS:
1049 case IES_INIT:
1050 case IES_LBRAC:
1051 if (setSymRef(Val, ID, ErrMsg))
1052 return true;
1053 OffsetOperator = true;
1054 OffsetOperatorLoc = OffsetLoc;
1055 State = IES_OFFSET;
1056 // As we cannot yet resolve the actual value (offset), we retain
1057 // the requested semantics by pushing a '0' to the operands stack
1058 IC.pushOperand(IC_IMM);
1059 if (ParsingMSInlineAsm) {
1060 Info = IDInfo;
1061 }
1062 break;
1063 }
1064 return false;
1065 }
onCast(AsmTypeInfo Info)1066 void onCast(AsmTypeInfo Info) {
1067 PrevState = State;
1068 switch (State) {
1069 default:
1070 State = IES_ERROR;
1071 break;
1072 case IES_LPAREN:
1073 setTypeInfo(Info);
1074 State = IES_CAST;
1075 break;
1076 }
1077 }
setTypeInfo(AsmTypeInfo Type)1078 void setTypeInfo(AsmTypeInfo Type) { CurType = Type; }
1079 };
1080
Error(SMLoc L,const Twine & Msg,SMRange Range=std::nullopt,bool MatchingInlineAsm=false)1081 bool Error(SMLoc L, const Twine &Msg, SMRange Range = std::nullopt,
1082 bool MatchingInlineAsm = false) {
1083 MCAsmParser &Parser = getParser();
1084 if (MatchingInlineAsm) {
1085 if (!getLexer().isAtStartOfStatement())
1086 Parser.eatToEndOfStatement();
1087 return false;
1088 }
1089 return Parser.Error(L, Msg, Range);
1090 }
1091
1092 bool MatchRegisterByName(MCRegister &RegNo, StringRef RegName, SMLoc StartLoc,
1093 SMLoc EndLoc);
1094 bool ParseRegister(MCRegister &RegNo, SMLoc &StartLoc, SMLoc &EndLoc,
1095 bool RestoreOnFailure);
1096
1097 std::unique_ptr<X86Operand> DefaultMemSIOperand(SMLoc Loc);
1098 std::unique_ptr<X86Operand> DefaultMemDIOperand(SMLoc Loc);
1099 bool IsSIReg(unsigned Reg);
1100 unsigned GetSIDIForRegClass(unsigned RegClassID, unsigned Reg, bool IsSIReg);
1101 void
1102 AddDefaultSrcDestOperands(OperandVector &Operands,
1103 std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1104 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst);
1105 bool VerifyAndAdjustOperands(OperandVector &OrigOperands,
1106 OperandVector &FinalOperands);
1107 bool parseOperand(OperandVector &Operands, StringRef Name);
1108 bool parseATTOperand(OperandVector &Operands);
1109 bool parseIntelOperand(OperandVector &Operands, StringRef Name);
1110 bool ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
1111 InlineAsmIdentifierInfo &Info, SMLoc &End);
1112 bool ParseIntelDotOperator(IntelExprStateMachine &SM, SMLoc &End);
1113 unsigned IdentifyIntelInlineAsmOperator(StringRef Name);
1114 unsigned ParseIntelInlineAsmOperator(unsigned OpKind);
1115 unsigned IdentifyMasmOperator(StringRef Name);
1116 bool ParseMasmOperator(unsigned OpKind, int64_t &Val);
1117 bool ParseRoundingModeOp(SMLoc Start, OperandVector &Operands);
1118 bool ParseIntelNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1119 bool &ParseError, SMLoc &End);
1120 bool ParseMasmNamedOperator(StringRef Name, IntelExprStateMachine &SM,
1121 bool &ParseError, SMLoc &End);
1122 void RewriteIntelExpression(IntelExprStateMachine &SM, SMLoc Start,
1123 SMLoc End);
1124 bool ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End);
1125 bool ParseIntelInlineAsmIdentifier(const MCExpr *&Val, StringRef &Identifier,
1126 InlineAsmIdentifierInfo &Info,
1127 bool IsUnevaluatedOperand, SMLoc &End,
1128 bool IsParsingOffsetOperator = false);
1129 void tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1130 IntelExprStateMachine &SM);
1131
1132 bool ParseMemOperand(unsigned SegReg, const MCExpr *Disp, SMLoc StartLoc,
1133 SMLoc EndLoc, OperandVector &Operands);
1134
1135 X86::CondCode ParseConditionCode(StringRef CCode);
1136
1137 bool ParseIntelMemoryOperandSize(unsigned &Size);
1138 bool CreateMemForMSInlineAsm(unsigned SegReg, const MCExpr *Disp,
1139 unsigned BaseReg, unsigned IndexReg,
1140 unsigned Scale, SMLoc Start, SMLoc End,
1141 unsigned Size, StringRef Identifier,
1142 const InlineAsmIdentifierInfo &Info,
1143 OperandVector &Operands);
1144
1145 bool parseDirectiveArch();
1146 bool parseDirectiveNops(SMLoc L);
1147 bool parseDirectiveEven(SMLoc L);
1148 bool ParseDirectiveCode(StringRef IDVal, SMLoc L);
1149
1150 /// CodeView FPO data directives.
1151 bool parseDirectiveFPOProc(SMLoc L);
1152 bool parseDirectiveFPOSetFrame(SMLoc L);
1153 bool parseDirectiveFPOPushReg(SMLoc L);
1154 bool parseDirectiveFPOStackAlloc(SMLoc L);
1155 bool parseDirectiveFPOStackAlign(SMLoc L);
1156 bool parseDirectiveFPOEndPrologue(SMLoc L);
1157 bool parseDirectiveFPOEndProc(SMLoc L);
1158
1159 /// SEH directives.
1160 bool parseSEHRegisterNumber(unsigned RegClassID, MCRegister &RegNo);
1161 bool parseDirectiveSEHPushReg(SMLoc);
1162 bool parseDirectiveSEHSetFrame(SMLoc);
1163 bool parseDirectiveSEHSaveReg(SMLoc);
1164 bool parseDirectiveSEHSaveXMM(SMLoc);
1165 bool parseDirectiveSEHPushFrame(SMLoc);
1166
1167 unsigned checkTargetMatchPredicate(MCInst &Inst) override;
1168
1169 bool validateInstruction(MCInst &Inst, const OperandVector &Ops);
1170 bool processInstruction(MCInst &Inst, const OperandVector &Ops);
1171
1172 // Load Value Injection (LVI) Mitigations for machine code
1173 void emitWarningForSpecialLVIInstruction(SMLoc Loc);
1174 void applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out);
1175 void applyLVILoadHardeningMitigation(MCInst &Inst, MCStreamer &Out);
1176
1177 /// Wrapper around MCStreamer::emitInstruction(). Possibly adds
1178 /// instrumentation around Inst.
1179 void emitInstruction(MCInst &Inst, OperandVector &Operands, MCStreamer &Out);
1180
1181 bool MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
1182 OperandVector &Operands, MCStreamer &Out,
1183 uint64_t &ErrorInfo,
1184 bool MatchingInlineAsm) override;
1185
1186 void MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op, OperandVector &Operands,
1187 MCStreamer &Out, bool MatchingInlineAsm);
1188
1189 bool ErrorMissingFeature(SMLoc IDLoc, const FeatureBitset &MissingFeatures,
1190 bool MatchingInlineAsm);
1191
1192 bool MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
1193 OperandVector &Operands, MCStreamer &Out,
1194 uint64_t &ErrorInfo,
1195 bool MatchingInlineAsm);
1196
1197 bool MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
1198 OperandVector &Operands, MCStreamer &Out,
1199 uint64_t &ErrorInfo,
1200 bool MatchingInlineAsm);
1201
1202 bool OmitRegisterFromClobberLists(unsigned RegNo) override;
1203
1204 /// Parses AVX512 specific operand primitives: masked registers ({%k<NUM>}, {z})
1205 /// and memory broadcasting ({1to<NUM>}) primitives, updating Operands vector if required.
1206 /// return false if no parsing errors occurred, true otherwise.
1207 bool HandleAVX512Operand(OperandVector &Operands);
1208
1209 bool ParseZ(std::unique_ptr<X86Operand> &Z, const SMLoc &StartLoc);
1210
is64BitMode() const1211 bool is64BitMode() const {
1212 // FIXME: Can tablegen auto-generate this?
1213 return getSTI().getFeatureBits()[X86::Is64Bit];
1214 }
is32BitMode() const1215 bool is32BitMode() const {
1216 // FIXME: Can tablegen auto-generate this?
1217 return getSTI().getFeatureBits()[X86::Is32Bit];
1218 }
is16BitMode() const1219 bool is16BitMode() const {
1220 // FIXME: Can tablegen auto-generate this?
1221 return getSTI().getFeatureBits()[X86::Is16Bit];
1222 }
SwitchMode(unsigned mode)1223 void SwitchMode(unsigned mode) {
1224 MCSubtargetInfo &STI = copySTI();
1225 FeatureBitset AllModes({X86::Is64Bit, X86::Is32Bit, X86::Is16Bit});
1226 FeatureBitset OldMode = STI.getFeatureBits() & AllModes;
1227 FeatureBitset FB = ComputeAvailableFeatures(
1228 STI.ToggleFeature(OldMode.flip(mode)));
1229 setAvailableFeatures(FB);
1230
1231 assert(FeatureBitset({mode}) == (STI.getFeatureBits() & AllModes));
1232 }
1233
getPointerWidth()1234 unsigned getPointerWidth() {
1235 if (is16BitMode()) return 16;
1236 if (is32BitMode()) return 32;
1237 if (is64BitMode()) return 64;
1238 llvm_unreachable("invalid mode");
1239 }
1240
isParsingIntelSyntax()1241 bool isParsingIntelSyntax() {
1242 return getParser().getAssemblerDialect();
1243 }
1244
1245 /// @name Auto-generated Matcher Functions
1246 /// {
1247
1248 #define GET_ASSEMBLER_HEADER
1249 #include "X86GenAsmMatcher.inc"
1250
1251 /// }
1252
1253 public:
1254 enum X86MatchResultTy {
1255 Match_Unsupported = FIRST_TARGET_MATCH_RESULT_TY,
1256 #define GET_OPERAND_DIAGNOSTIC_TYPES
1257 #include "X86GenAsmMatcher.inc"
1258 };
1259
X86AsmParser(const MCSubtargetInfo & sti,MCAsmParser & Parser,const MCInstrInfo & mii,const MCTargetOptions & Options)1260 X86AsmParser(const MCSubtargetInfo &sti, MCAsmParser &Parser,
1261 const MCInstrInfo &mii, const MCTargetOptions &Options)
1262 : MCTargetAsmParser(Options, sti, mii), InstInfo(nullptr),
1263 Code16GCC(false) {
1264
1265 Parser.addAliasForDirective(".word", ".2byte");
1266
1267 // Initialize the set of available features.
1268 setAvailableFeatures(ComputeAvailableFeatures(getSTI().getFeatureBits()));
1269 }
1270
1271 bool parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1272 SMLoc &EndLoc) override;
1273 OperandMatchResultTy tryParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1274 SMLoc &EndLoc) override;
1275
1276 bool parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) override;
1277
1278 bool ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
1279 SMLoc NameLoc, OperandVector &Operands) override;
1280
1281 bool ParseDirective(AsmToken DirectiveID) override;
1282 };
1283 } // end anonymous namespace
1284
1285 /// @name Auto-generated Match Functions
1286 /// {
1287
1288 static unsigned MatchRegisterName(StringRef Name);
1289
1290 /// }
1291
CheckBaseRegAndIndexRegAndScale(unsigned BaseReg,unsigned IndexReg,unsigned Scale,bool Is64BitMode,StringRef & ErrMsg)1292 static bool CheckBaseRegAndIndexRegAndScale(unsigned BaseReg, unsigned IndexReg,
1293 unsigned Scale, bool Is64BitMode,
1294 StringRef &ErrMsg) {
1295 // If we have both a base register and an index register make sure they are
1296 // both 64-bit or 32-bit registers.
1297 // To support VSIB, IndexReg can be 128-bit or 256-bit registers.
1298
1299 if (BaseReg != 0 &&
1300 !(BaseReg == X86::RIP || BaseReg == X86::EIP ||
1301 X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) ||
1302 X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) ||
1303 X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg))) {
1304 ErrMsg = "invalid base+index expression";
1305 return true;
1306 }
1307
1308 if (IndexReg != 0 &&
1309 !(IndexReg == X86::EIZ || IndexReg == X86::RIZ ||
1310 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1311 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1312 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1313 X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
1314 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
1315 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg))) {
1316 ErrMsg = "invalid base+index expression";
1317 return true;
1318 }
1319
1320 if (((BaseReg == X86::RIP || BaseReg == X86::EIP) && IndexReg != 0) ||
1321 IndexReg == X86::EIP || IndexReg == X86::RIP ||
1322 IndexReg == X86::ESP || IndexReg == X86::RSP) {
1323 ErrMsg = "invalid base+index expression";
1324 return true;
1325 }
1326
1327 // Check for use of invalid 16-bit registers. Only BX/BP/SI/DI are allowed,
1328 // and then only in non-64-bit modes.
1329 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
1330 (Is64BitMode || (BaseReg != X86::BX && BaseReg != X86::BP &&
1331 BaseReg != X86::SI && BaseReg != X86::DI))) {
1332 ErrMsg = "invalid 16-bit base register";
1333 return true;
1334 }
1335
1336 if (BaseReg == 0 &&
1337 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg)) {
1338 ErrMsg = "16-bit memory operand may not include only index register";
1339 return true;
1340 }
1341
1342 if (BaseReg != 0 && IndexReg != 0) {
1343 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(BaseReg) &&
1344 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1345 X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1346 IndexReg == X86::EIZ)) {
1347 ErrMsg = "base register is 64-bit, but index register is not";
1348 return true;
1349 }
1350 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(BaseReg) &&
1351 (X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg) ||
1352 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg) ||
1353 IndexReg == X86::RIZ)) {
1354 ErrMsg = "base register is 32-bit, but index register is not";
1355 return true;
1356 }
1357 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg)) {
1358 if (X86MCRegisterClasses[X86::GR32RegClassID].contains(IndexReg) ||
1359 X86MCRegisterClasses[X86::GR64RegClassID].contains(IndexReg)) {
1360 ErrMsg = "base register is 16-bit, but index register is not";
1361 return true;
1362 }
1363 if ((BaseReg != X86::BX && BaseReg != X86::BP) ||
1364 (IndexReg != X86::SI && IndexReg != X86::DI)) {
1365 ErrMsg = "invalid 16-bit base/index register combination";
1366 return true;
1367 }
1368 }
1369 }
1370
1371 // RIP/EIP-relative addressing is only supported in 64-bit mode.
1372 if (!Is64BitMode && BaseReg != 0 &&
1373 (BaseReg == X86::RIP || BaseReg == X86::EIP)) {
1374 ErrMsg = "IP-relative addressing requires 64-bit mode";
1375 return true;
1376 }
1377
1378 return checkScale(Scale, ErrMsg);
1379 }
1380
MatchRegisterByName(MCRegister & RegNo,StringRef RegName,SMLoc StartLoc,SMLoc EndLoc)1381 bool X86AsmParser::MatchRegisterByName(MCRegister &RegNo, StringRef RegName,
1382 SMLoc StartLoc, SMLoc EndLoc) {
1383 // If we encounter a %, ignore it. This code handles registers with and
1384 // without the prefix, unprefixed registers can occur in cfi directives.
1385 RegName.consume_front("%");
1386
1387 RegNo = MatchRegisterName(RegName);
1388
1389 // If the match failed, try the register name as lowercase.
1390 if (RegNo == 0)
1391 RegNo = MatchRegisterName(RegName.lower());
1392
1393 // The "flags" and "mxcsr" registers cannot be referenced directly.
1394 // Treat it as an identifier instead.
1395 if (isParsingMSInlineAsm() && isParsingIntelSyntax() &&
1396 (RegNo == X86::EFLAGS || RegNo == X86::MXCSR))
1397 RegNo = 0;
1398
1399 if (!is64BitMode()) {
1400 // FIXME: This should be done using Requires<Not64BitMode> and
1401 // Requires<In64BitMode> so "eiz" usage in 64-bit instructions can be also
1402 // checked.
1403 if (RegNo == X86::RIZ || RegNo == X86::RIP ||
1404 X86MCRegisterClasses[X86::GR64RegClassID].contains(RegNo) ||
1405 X86II::isX86_64NonExtLowByteReg(RegNo) ||
1406 X86II::isX86_64ExtendedReg(RegNo)) {
1407 return Error(StartLoc,
1408 "register %" + RegName + " is only available in 64-bit mode",
1409 SMRange(StartLoc, EndLoc));
1410 }
1411 }
1412
1413 // If this is "db[0-15]", match it as an alias
1414 // for dr[0-15].
1415 if (RegNo == 0 && RegName.startswith("db")) {
1416 if (RegName.size() == 3) {
1417 switch (RegName[2]) {
1418 case '0':
1419 RegNo = X86::DR0;
1420 break;
1421 case '1':
1422 RegNo = X86::DR1;
1423 break;
1424 case '2':
1425 RegNo = X86::DR2;
1426 break;
1427 case '3':
1428 RegNo = X86::DR3;
1429 break;
1430 case '4':
1431 RegNo = X86::DR4;
1432 break;
1433 case '5':
1434 RegNo = X86::DR5;
1435 break;
1436 case '6':
1437 RegNo = X86::DR6;
1438 break;
1439 case '7':
1440 RegNo = X86::DR7;
1441 break;
1442 case '8':
1443 RegNo = X86::DR8;
1444 break;
1445 case '9':
1446 RegNo = X86::DR9;
1447 break;
1448 }
1449 } else if (RegName.size() == 4 && RegName[2] == '1') {
1450 switch (RegName[3]) {
1451 case '0':
1452 RegNo = X86::DR10;
1453 break;
1454 case '1':
1455 RegNo = X86::DR11;
1456 break;
1457 case '2':
1458 RegNo = X86::DR12;
1459 break;
1460 case '3':
1461 RegNo = X86::DR13;
1462 break;
1463 case '4':
1464 RegNo = X86::DR14;
1465 break;
1466 case '5':
1467 RegNo = X86::DR15;
1468 break;
1469 }
1470 }
1471 }
1472
1473 if (RegNo == 0) {
1474 if (isParsingIntelSyntax())
1475 return true;
1476 return Error(StartLoc, "invalid register name", SMRange(StartLoc, EndLoc));
1477 }
1478 return false;
1479 }
1480
ParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc,bool RestoreOnFailure)1481 bool X86AsmParser::ParseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1482 SMLoc &EndLoc, bool RestoreOnFailure) {
1483 MCAsmParser &Parser = getParser();
1484 MCAsmLexer &Lexer = getLexer();
1485 RegNo = 0;
1486
1487 SmallVector<AsmToken, 5> Tokens;
1488 auto OnFailure = [RestoreOnFailure, &Lexer, &Tokens]() {
1489 if (RestoreOnFailure) {
1490 while (!Tokens.empty()) {
1491 Lexer.UnLex(Tokens.pop_back_val());
1492 }
1493 }
1494 };
1495
1496 const AsmToken &PercentTok = Parser.getTok();
1497 StartLoc = PercentTok.getLoc();
1498
1499 // If we encounter a %, ignore it. This code handles registers with and
1500 // without the prefix, unprefixed registers can occur in cfi directives.
1501 if (!isParsingIntelSyntax() && PercentTok.is(AsmToken::Percent)) {
1502 Tokens.push_back(PercentTok);
1503 Parser.Lex(); // Eat percent token.
1504 }
1505
1506 const AsmToken &Tok = Parser.getTok();
1507 EndLoc = Tok.getEndLoc();
1508
1509 if (Tok.isNot(AsmToken::Identifier)) {
1510 OnFailure();
1511 if (isParsingIntelSyntax()) return true;
1512 return Error(StartLoc, "invalid register name",
1513 SMRange(StartLoc, EndLoc));
1514 }
1515
1516 if (MatchRegisterByName(RegNo, Tok.getString(), StartLoc, EndLoc)) {
1517 OnFailure();
1518 return true;
1519 }
1520
1521 // Parse "%st" as "%st(0)" and "%st(1)", which is multiple tokens.
1522 if (RegNo == X86::ST0) {
1523 Tokens.push_back(Tok);
1524 Parser.Lex(); // Eat 'st'
1525
1526 // Check to see if we have '(4)' after %st.
1527 if (Lexer.isNot(AsmToken::LParen))
1528 return false;
1529 // Lex the paren.
1530 Tokens.push_back(Parser.getTok());
1531 Parser.Lex();
1532
1533 const AsmToken &IntTok = Parser.getTok();
1534 if (IntTok.isNot(AsmToken::Integer)) {
1535 OnFailure();
1536 return Error(IntTok.getLoc(), "expected stack index");
1537 }
1538 switch (IntTok.getIntVal()) {
1539 case 0: RegNo = X86::ST0; break;
1540 case 1: RegNo = X86::ST1; break;
1541 case 2: RegNo = X86::ST2; break;
1542 case 3: RegNo = X86::ST3; break;
1543 case 4: RegNo = X86::ST4; break;
1544 case 5: RegNo = X86::ST5; break;
1545 case 6: RegNo = X86::ST6; break;
1546 case 7: RegNo = X86::ST7; break;
1547 default:
1548 OnFailure();
1549 return Error(IntTok.getLoc(), "invalid stack index");
1550 }
1551
1552 // Lex IntTok
1553 Tokens.push_back(IntTok);
1554 Parser.Lex();
1555 if (Lexer.isNot(AsmToken::RParen)) {
1556 OnFailure();
1557 return Error(Parser.getTok().getLoc(), "expected ')'");
1558 }
1559
1560 EndLoc = Parser.getTok().getEndLoc();
1561 Parser.Lex(); // Eat ')'
1562 return false;
1563 }
1564
1565 EndLoc = Parser.getTok().getEndLoc();
1566
1567 if (RegNo == 0) {
1568 OnFailure();
1569 if (isParsingIntelSyntax()) return true;
1570 return Error(StartLoc, "invalid register name",
1571 SMRange(StartLoc, EndLoc));
1572 }
1573
1574 Parser.Lex(); // Eat identifier token.
1575 return false;
1576 }
1577
parseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1578 bool X86AsmParser::parseRegister(MCRegister &RegNo, SMLoc &StartLoc,
1579 SMLoc &EndLoc) {
1580 return ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/false);
1581 }
1582
tryParseRegister(MCRegister & RegNo,SMLoc & StartLoc,SMLoc & EndLoc)1583 OperandMatchResultTy X86AsmParser::tryParseRegister(MCRegister &RegNo,
1584 SMLoc &StartLoc,
1585 SMLoc &EndLoc) {
1586 bool Result =
1587 ParseRegister(RegNo, StartLoc, EndLoc, /*RestoreOnFailure=*/true);
1588 bool PendingErrors = getParser().hasPendingError();
1589 getParser().clearPendingErrors();
1590 if (PendingErrors)
1591 return MatchOperand_ParseFail;
1592 if (Result)
1593 return MatchOperand_NoMatch;
1594 return MatchOperand_Success;
1595 }
1596
DefaultMemSIOperand(SMLoc Loc)1597 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemSIOperand(SMLoc Loc) {
1598 bool Parse32 = is32BitMode() || Code16GCC;
1599 unsigned Basereg = is64BitMode() ? X86::RSI : (Parse32 ? X86::ESI : X86::SI);
1600 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1601 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1602 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1603 Loc, Loc, 0);
1604 }
1605
DefaultMemDIOperand(SMLoc Loc)1606 std::unique_ptr<X86Operand> X86AsmParser::DefaultMemDIOperand(SMLoc Loc) {
1607 bool Parse32 = is32BitMode() || Code16GCC;
1608 unsigned Basereg = is64BitMode() ? X86::RDI : (Parse32 ? X86::EDI : X86::DI);
1609 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
1610 return X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
1611 /*BaseReg=*/Basereg, /*IndexReg=*/0, /*Scale=*/1,
1612 Loc, Loc, 0);
1613 }
1614
IsSIReg(unsigned Reg)1615 bool X86AsmParser::IsSIReg(unsigned Reg) {
1616 switch (Reg) {
1617 default: llvm_unreachable("Only (R|E)SI and (R|E)DI are expected!");
1618 case X86::RSI:
1619 case X86::ESI:
1620 case X86::SI:
1621 return true;
1622 case X86::RDI:
1623 case X86::EDI:
1624 case X86::DI:
1625 return false;
1626 }
1627 }
1628
GetSIDIForRegClass(unsigned RegClassID,unsigned Reg,bool IsSIReg)1629 unsigned X86AsmParser::GetSIDIForRegClass(unsigned RegClassID, unsigned Reg,
1630 bool IsSIReg) {
1631 switch (RegClassID) {
1632 default: llvm_unreachable("Unexpected register class");
1633 case X86::GR64RegClassID:
1634 return IsSIReg ? X86::RSI : X86::RDI;
1635 case X86::GR32RegClassID:
1636 return IsSIReg ? X86::ESI : X86::EDI;
1637 case X86::GR16RegClassID:
1638 return IsSIReg ? X86::SI : X86::DI;
1639 }
1640 }
1641
AddDefaultSrcDestOperands(OperandVector & Operands,std::unique_ptr<llvm::MCParsedAsmOperand> && Src,std::unique_ptr<llvm::MCParsedAsmOperand> && Dst)1642 void X86AsmParser::AddDefaultSrcDestOperands(
1643 OperandVector& Operands, std::unique_ptr<llvm::MCParsedAsmOperand> &&Src,
1644 std::unique_ptr<llvm::MCParsedAsmOperand> &&Dst) {
1645 if (isParsingIntelSyntax()) {
1646 Operands.push_back(std::move(Dst));
1647 Operands.push_back(std::move(Src));
1648 }
1649 else {
1650 Operands.push_back(std::move(Src));
1651 Operands.push_back(std::move(Dst));
1652 }
1653 }
1654
VerifyAndAdjustOperands(OperandVector & OrigOperands,OperandVector & FinalOperands)1655 bool X86AsmParser::VerifyAndAdjustOperands(OperandVector &OrigOperands,
1656 OperandVector &FinalOperands) {
1657
1658 if (OrigOperands.size() > 1) {
1659 // Check if sizes match, OrigOperands also contains the instruction name
1660 assert(OrigOperands.size() == FinalOperands.size() + 1 &&
1661 "Operand size mismatch");
1662
1663 SmallVector<std::pair<SMLoc, std::string>, 2> Warnings;
1664 // Verify types match
1665 int RegClassID = -1;
1666 for (unsigned int i = 0; i < FinalOperands.size(); ++i) {
1667 X86Operand &OrigOp = static_cast<X86Operand &>(*OrigOperands[i + 1]);
1668 X86Operand &FinalOp = static_cast<X86Operand &>(*FinalOperands[i]);
1669
1670 if (FinalOp.isReg() &&
1671 (!OrigOp.isReg() || FinalOp.getReg() != OrigOp.getReg()))
1672 // Return false and let a normal complaint about bogus operands happen
1673 return false;
1674
1675 if (FinalOp.isMem()) {
1676
1677 if (!OrigOp.isMem())
1678 // Return false and let a normal complaint about bogus operands happen
1679 return false;
1680
1681 unsigned OrigReg = OrigOp.Mem.BaseReg;
1682 unsigned FinalReg = FinalOp.Mem.BaseReg;
1683
1684 // If we've already encounterd a register class, make sure all register
1685 // bases are of the same register class
1686 if (RegClassID != -1 &&
1687 !X86MCRegisterClasses[RegClassID].contains(OrigReg)) {
1688 return Error(OrigOp.getStartLoc(),
1689 "mismatching source and destination index registers");
1690 }
1691
1692 if (X86MCRegisterClasses[X86::GR64RegClassID].contains(OrigReg))
1693 RegClassID = X86::GR64RegClassID;
1694 else if (X86MCRegisterClasses[X86::GR32RegClassID].contains(OrigReg))
1695 RegClassID = X86::GR32RegClassID;
1696 else if (X86MCRegisterClasses[X86::GR16RegClassID].contains(OrigReg))
1697 RegClassID = X86::GR16RegClassID;
1698 else
1699 // Unexpected register class type
1700 // Return false and let a normal complaint about bogus operands happen
1701 return false;
1702
1703 bool IsSI = IsSIReg(FinalReg);
1704 FinalReg = GetSIDIForRegClass(RegClassID, FinalReg, IsSI);
1705
1706 if (FinalReg != OrigReg) {
1707 std::string RegName = IsSI ? "ES:(R|E)SI" : "ES:(R|E)DI";
1708 Warnings.push_back(std::make_pair(
1709 OrigOp.getStartLoc(),
1710 "memory operand is only for determining the size, " + RegName +
1711 " will be used for the location"));
1712 }
1713
1714 FinalOp.Mem.Size = OrigOp.Mem.Size;
1715 FinalOp.Mem.SegReg = OrigOp.Mem.SegReg;
1716 FinalOp.Mem.BaseReg = FinalReg;
1717 }
1718 }
1719
1720 // Produce warnings only if all the operands passed the adjustment - prevent
1721 // legal cases like "movsd (%rax), %xmm0" mistakenly produce warnings
1722 for (auto &WarningMsg : Warnings) {
1723 Warning(WarningMsg.first, WarningMsg.second);
1724 }
1725
1726 // Remove old operands
1727 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1728 OrigOperands.pop_back();
1729 }
1730 // OrigOperands.append(FinalOperands.begin(), FinalOperands.end());
1731 for (unsigned int i = 0; i < FinalOperands.size(); ++i)
1732 OrigOperands.push_back(std::move(FinalOperands[i]));
1733
1734 return false;
1735 }
1736
parseOperand(OperandVector & Operands,StringRef Name)1737 bool X86AsmParser::parseOperand(OperandVector &Operands, StringRef Name) {
1738 if (isParsingIntelSyntax())
1739 return parseIntelOperand(Operands, Name);
1740
1741 return parseATTOperand(Operands);
1742 }
1743
CreateMemForMSInlineAsm(unsigned SegReg,const MCExpr * Disp,unsigned BaseReg,unsigned IndexReg,unsigned Scale,SMLoc Start,SMLoc End,unsigned Size,StringRef Identifier,const InlineAsmIdentifierInfo & Info,OperandVector & Operands)1744 bool X86AsmParser::CreateMemForMSInlineAsm(
1745 unsigned SegReg, const MCExpr *Disp, unsigned BaseReg, unsigned IndexReg,
1746 unsigned Scale, SMLoc Start, SMLoc End, unsigned Size, StringRef Identifier,
1747 const InlineAsmIdentifierInfo &Info, OperandVector &Operands) {
1748 // If we found a decl other than a VarDecl, then assume it is a FuncDecl or
1749 // some other label reference.
1750 if (Info.isKind(InlineAsmIdentifierInfo::IK_Label)) {
1751 // Insert an explicit size if the user didn't have one.
1752 if (!Size) {
1753 Size = getPointerWidth();
1754 InstInfo->AsmRewrites->emplace_back(AOK_SizeDirective, Start,
1755 /*Len=*/0, Size);
1756 }
1757 // Create an absolute memory reference in order to match against
1758 // instructions taking a PC relative operand.
1759 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1760 End, Size, Identifier,
1761 Info.Label.Decl));
1762 return false;
1763 }
1764 // We either have a direct symbol reference, or an offset from a symbol. The
1765 // parser always puts the symbol on the LHS, so look there for size
1766 // calculation purposes.
1767 unsigned FrontendSize = 0;
1768 void *Decl = nullptr;
1769 bool IsGlobalLV = false;
1770 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
1771 // Size is in terms of bits in this context.
1772 FrontendSize = Info.Var.Type * 8;
1773 Decl = Info.Var.Decl;
1774 IsGlobalLV = Info.Var.IsGlobalLV;
1775 }
1776 // It is widely common for MS InlineAsm to use a global variable and one/two
1777 // registers in a mmory expression, and though unaccessible via rip/eip.
1778 if (IsGlobalLV && (BaseReg || IndexReg)) {
1779 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), Disp, Start,
1780 End, Size, Identifier, Decl, 0,
1781 BaseReg && IndexReg));
1782 return false;
1783 }
1784 // Otherwise, we set the base register to a non-zero value
1785 // if we don't know the actual value at this time. This is necessary to
1786 // get the matching correct in some cases.
1787 BaseReg = BaseReg ? BaseReg : 1;
1788 Operands.push_back(X86Operand::CreateMem(
1789 getPointerWidth(), SegReg, Disp, BaseReg, IndexReg, Scale, Start, End,
1790 Size,
1791 /*DefaultBaseReg=*/X86::RIP, Identifier, Decl, FrontendSize));
1792 return false;
1793 }
1794
1795 // Some binary bitwise operators have a named synonymous
1796 // Query a candidate string for being such a named operator
1797 // and if so - invoke the appropriate handler
ParseIntelNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1798 bool X86AsmParser::ParseIntelNamedOperator(StringRef Name,
1799 IntelExprStateMachine &SM,
1800 bool &ParseError, SMLoc &End) {
1801 // A named operator should be either lower or upper case, but not a mix...
1802 // except in MASM, which uses full case-insensitivity.
1803 if (Name.compare(Name.lower()) && Name.compare(Name.upper()) &&
1804 !getParser().isParsingMasm())
1805 return false;
1806 if (Name.equals_insensitive("not")) {
1807 SM.onNot();
1808 } else if (Name.equals_insensitive("or")) {
1809 SM.onOr();
1810 } else if (Name.equals_insensitive("shl")) {
1811 SM.onLShift();
1812 } else if (Name.equals_insensitive("shr")) {
1813 SM.onRShift();
1814 } else if (Name.equals_insensitive("xor")) {
1815 SM.onXor();
1816 } else if (Name.equals_insensitive("and")) {
1817 SM.onAnd();
1818 } else if (Name.equals_insensitive("mod")) {
1819 SM.onMod();
1820 } else if (Name.equals_insensitive("offset")) {
1821 SMLoc OffsetLoc = getTok().getLoc();
1822 const MCExpr *Val = nullptr;
1823 StringRef ID;
1824 InlineAsmIdentifierInfo Info;
1825 ParseError = ParseIntelOffsetOperator(Val, ID, Info, End);
1826 if (ParseError)
1827 return true;
1828 StringRef ErrMsg;
1829 ParseError =
1830 SM.onOffset(Val, OffsetLoc, ID, Info, isParsingMSInlineAsm(), ErrMsg);
1831 if (ParseError)
1832 return Error(SMLoc::getFromPointer(Name.data()), ErrMsg);
1833 } else {
1834 return false;
1835 }
1836 if (!Name.equals_insensitive("offset"))
1837 End = consumeToken();
1838 return true;
1839 }
ParseMasmNamedOperator(StringRef Name,IntelExprStateMachine & SM,bool & ParseError,SMLoc & End)1840 bool X86AsmParser::ParseMasmNamedOperator(StringRef Name,
1841 IntelExprStateMachine &SM,
1842 bool &ParseError, SMLoc &End) {
1843 if (Name.equals_insensitive("eq")) {
1844 SM.onEq();
1845 } else if (Name.equals_insensitive("ne")) {
1846 SM.onNE();
1847 } else if (Name.equals_insensitive("lt")) {
1848 SM.onLT();
1849 } else if (Name.equals_insensitive("le")) {
1850 SM.onLE();
1851 } else if (Name.equals_insensitive("gt")) {
1852 SM.onGT();
1853 } else if (Name.equals_insensitive("ge")) {
1854 SM.onGE();
1855 } else {
1856 return false;
1857 }
1858 End = consumeToken();
1859 return true;
1860 }
1861
1862 // Check if current intel expression append after an operand.
1863 // Like: [Operand][Intel Expression]
tryParseOperandIdx(AsmToken::TokenKind PrevTK,IntelExprStateMachine & SM)1864 void X86AsmParser::tryParseOperandIdx(AsmToken::TokenKind PrevTK,
1865 IntelExprStateMachine &SM) {
1866 if (PrevTK != AsmToken::RBrac)
1867 return;
1868
1869 SM.setAppendAfterOperand();
1870 }
1871
ParseIntelExpression(IntelExprStateMachine & SM,SMLoc & End)1872 bool X86AsmParser::ParseIntelExpression(IntelExprStateMachine &SM, SMLoc &End) {
1873 MCAsmParser &Parser = getParser();
1874 StringRef ErrMsg;
1875
1876 AsmToken::TokenKind PrevTK = AsmToken::Error;
1877
1878 if (getContext().getObjectFileInfo()->isPositionIndependent())
1879 SM.setPIC();
1880
1881 bool Done = false;
1882 while (!Done) {
1883 // Get a fresh reference on each loop iteration in case the previous
1884 // iteration moved the token storage during UnLex().
1885 const AsmToken &Tok = Parser.getTok();
1886
1887 bool UpdateLocLex = true;
1888 AsmToken::TokenKind TK = getLexer().getKind();
1889
1890 switch (TK) {
1891 default:
1892 if ((Done = SM.isValidEndState()))
1893 break;
1894 return Error(Tok.getLoc(), "unknown token in expression");
1895 case AsmToken::Error:
1896 return Error(getLexer().getErrLoc(), getLexer().getErr());
1897 break;
1898 case AsmToken::EndOfStatement:
1899 Done = true;
1900 break;
1901 case AsmToken::Real:
1902 // DotOperator: [ebx].0
1903 UpdateLocLex = false;
1904 if (ParseIntelDotOperator(SM, End))
1905 return true;
1906 break;
1907 case AsmToken::Dot:
1908 if (!Parser.isParsingMasm()) {
1909 if ((Done = SM.isValidEndState()))
1910 break;
1911 return Error(Tok.getLoc(), "unknown token in expression");
1912 }
1913 // MASM allows spaces around the dot operator (e.g., "var . x")
1914 Lex();
1915 UpdateLocLex = false;
1916 if (ParseIntelDotOperator(SM, End))
1917 return true;
1918 break;
1919 case AsmToken::Dollar:
1920 if (!Parser.isParsingMasm()) {
1921 if ((Done = SM.isValidEndState()))
1922 break;
1923 return Error(Tok.getLoc(), "unknown token in expression");
1924 }
1925 [[fallthrough]];
1926 case AsmToken::String: {
1927 if (Parser.isParsingMasm()) {
1928 // MASM parsers handle strings in expressions as constants.
1929 SMLoc ValueLoc = Tok.getLoc();
1930 int64_t Res;
1931 const MCExpr *Val;
1932 if (Parser.parsePrimaryExpr(Val, End, nullptr))
1933 return true;
1934 UpdateLocLex = false;
1935 if (!Val->evaluateAsAbsolute(Res, getStreamer().getAssemblerPtr()))
1936 return Error(ValueLoc, "expected absolute value");
1937 if (SM.onInteger(Res, ErrMsg))
1938 return Error(ValueLoc, ErrMsg);
1939 break;
1940 }
1941 [[fallthrough]];
1942 }
1943 case AsmToken::At:
1944 case AsmToken::Identifier: {
1945 SMLoc IdentLoc = Tok.getLoc();
1946 StringRef Identifier = Tok.getString();
1947 UpdateLocLex = false;
1948 if (Parser.isParsingMasm()) {
1949 size_t DotOffset = Identifier.find_first_of('.');
1950 if (DotOffset != StringRef::npos) {
1951 consumeToken();
1952 StringRef LHS = Identifier.slice(0, DotOffset);
1953 StringRef Dot = Identifier.slice(DotOffset, DotOffset + 1);
1954 StringRef RHS = Identifier.slice(DotOffset + 1, StringRef::npos);
1955 if (!RHS.empty()) {
1956 getLexer().UnLex(AsmToken(AsmToken::Identifier, RHS));
1957 }
1958 getLexer().UnLex(AsmToken(AsmToken::Dot, Dot));
1959 if (!LHS.empty()) {
1960 getLexer().UnLex(AsmToken(AsmToken::Identifier, LHS));
1961 }
1962 break;
1963 }
1964 }
1965 // (MASM only) <TYPE> PTR operator
1966 if (Parser.isParsingMasm()) {
1967 const AsmToken &NextTok = getLexer().peekTok();
1968 if (NextTok.is(AsmToken::Identifier) &&
1969 NextTok.getIdentifier().equals_insensitive("ptr")) {
1970 AsmTypeInfo Info;
1971 if (Parser.lookUpType(Identifier, Info))
1972 return Error(Tok.getLoc(), "unknown type");
1973 SM.onCast(Info);
1974 // Eat type and PTR.
1975 consumeToken();
1976 End = consumeToken();
1977 break;
1978 }
1979 }
1980 // Register, or (MASM only) <register>.<field>
1981 MCRegister Reg;
1982 if (Tok.is(AsmToken::Identifier)) {
1983 if (!ParseRegister(Reg, IdentLoc, End, /*RestoreOnFailure=*/true)) {
1984 if (SM.onRegister(Reg, ErrMsg))
1985 return Error(IdentLoc, ErrMsg);
1986 break;
1987 }
1988 if (Parser.isParsingMasm()) {
1989 const std::pair<StringRef, StringRef> IDField =
1990 Tok.getString().split('.');
1991 const StringRef ID = IDField.first, Field = IDField.second;
1992 SMLoc IDEndLoc = SMLoc::getFromPointer(ID.data() + ID.size());
1993 if (!Field.empty() &&
1994 !MatchRegisterByName(Reg, ID, IdentLoc, IDEndLoc)) {
1995 if (SM.onRegister(Reg, ErrMsg))
1996 return Error(IdentLoc, ErrMsg);
1997
1998 AsmFieldInfo Info;
1999 SMLoc FieldStartLoc = SMLoc::getFromPointer(Field.data());
2000 if (Parser.lookUpField(Field, Info))
2001 return Error(FieldStartLoc, "unknown offset");
2002 else if (SM.onPlus(ErrMsg))
2003 return Error(getTok().getLoc(), ErrMsg);
2004 else if (SM.onInteger(Info.Offset, ErrMsg))
2005 return Error(IdentLoc, ErrMsg);
2006 SM.setTypeInfo(Info.Type);
2007
2008 End = consumeToken();
2009 break;
2010 }
2011 }
2012 }
2013 // Operator synonymous ("not", "or" etc.)
2014 bool ParseError = false;
2015 if (ParseIntelNamedOperator(Identifier, SM, ParseError, End)) {
2016 if (ParseError)
2017 return true;
2018 break;
2019 }
2020 if (Parser.isParsingMasm() &&
2021 ParseMasmNamedOperator(Identifier, SM, ParseError, End)) {
2022 if (ParseError)
2023 return true;
2024 break;
2025 }
2026 // Symbol reference, when parsing assembly content
2027 InlineAsmIdentifierInfo Info;
2028 AsmFieldInfo FieldInfo;
2029 const MCExpr *Val;
2030 if (isParsingMSInlineAsm() || Parser.isParsingMasm()) {
2031 // MS Dot Operator expression
2032 if (Identifier.count('.') &&
2033 (PrevTK == AsmToken::RBrac || PrevTK == AsmToken::RParen)) {
2034 if (ParseIntelDotOperator(SM, End))
2035 return true;
2036 break;
2037 }
2038 }
2039 if (isParsingMSInlineAsm()) {
2040 // MS InlineAsm operators (TYPE/LENGTH/SIZE)
2041 if (unsigned OpKind = IdentifyIntelInlineAsmOperator(Identifier)) {
2042 if (int64_t Val = ParseIntelInlineAsmOperator(OpKind)) {
2043 if (SM.onInteger(Val, ErrMsg))
2044 return Error(IdentLoc, ErrMsg);
2045 } else {
2046 return true;
2047 }
2048 break;
2049 }
2050 // MS InlineAsm identifier
2051 // Call parseIdentifier() to combine @ with the identifier behind it.
2052 if (TK == AsmToken::At && Parser.parseIdentifier(Identifier))
2053 return Error(IdentLoc, "expected identifier");
2054 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info, false, End))
2055 return true;
2056 else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2057 true, ErrMsg))
2058 return Error(IdentLoc, ErrMsg);
2059 break;
2060 }
2061 if (Parser.isParsingMasm()) {
2062 if (unsigned OpKind = IdentifyMasmOperator(Identifier)) {
2063 int64_t Val;
2064 if (ParseMasmOperator(OpKind, Val))
2065 return true;
2066 if (SM.onInteger(Val, ErrMsg))
2067 return Error(IdentLoc, ErrMsg);
2068 break;
2069 }
2070 if (!getParser().lookUpType(Identifier, FieldInfo.Type)) {
2071 // Field offset immediate; <TYPE>.<field specification>
2072 Lex(); // eat type
2073 bool EndDot = parseOptionalToken(AsmToken::Dot);
2074 while (EndDot || (getTok().is(AsmToken::Identifier) &&
2075 getTok().getString().startswith("."))) {
2076 getParser().parseIdentifier(Identifier);
2077 if (!EndDot)
2078 Identifier.consume_front(".");
2079 EndDot = Identifier.consume_back(".");
2080 if (getParser().lookUpField(FieldInfo.Type.Name, Identifier,
2081 FieldInfo)) {
2082 SMLoc IDEnd =
2083 SMLoc::getFromPointer(Identifier.data() + Identifier.size());
2084 return Error(IdentLoc, "Unable to lookup field reference!",
2085 SMRange(IdentLoc, IDEnd));
2086 }
2087 if (!EndDot)
2088 EndDot = parseOptionalToken(AsmToken::Dot);
2089 }
2090 if (SM.onInteger(FieldInfo.Offset, ErrMsg))
2091 return Error(IdentLoc, ErrMsg);
2092 break;
2093 }
2094 }
2095 if (getParser().parsePrimaryExpr(Val, End, &FieldInfo.Type)) {
2096 return Error(Tok.getLoc(), "Unexpected identifier!");
2097 } else if (SM.onIdentifierExpr(Val, Identifier, Info, FieldInfo.Type,
2098 false, ErrMsg)) {
2099 return Error(IdentLoc, ErrMsg);
2100 }
2101 break;
2102 }
2103 case AsmToken::Integer: {
2104 // Look for 'b' or 'f' following an Integer as a directional label
2105 SMLoc Loc = getTok().getLoc();
2106 int64_t IntVal = getTok().getIntVal();
2107 End = consumeToken();
2108 UpdateLocLex = false;
2109 if (getLexer().getKind() == AsmToken::Identifier) {
2110 StringRef IDVal = getTok().getString();
2111 if (IDVal == "f" || IDVal == "b") {
2112 MCSymbol *Sym =
2113 getContext().getDirectionalLocalSymbol(IntVal, IDVal == "b");
2114 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2115 const MCExpr *Val =
2116 MCSymbolRefExpr::create(Sym, Variant, getContext());
2117 if (IDVal == "b" && Sym->isUndefined())
2118 return Error(Loc, "invalid reference to undefined symbol");
2119 StringRef Identifier = Sym->getName();
2120 InlineAsmIdentifierInfo Info;
2121 AsmTypeInfo Type;
2122 if (SM.onIdentifierExpr(Val, Identifier, Info, Type,
2123 isParsingMSInlineAsm(), ErrMsg))
2124 return Error(Loc, ErrMsg);
2125 End = consumeToken();
2126 } else {
2127 if (SM.onInteger(IntVal, ErrMsg))
2128 return Error(Loc, ErrMsg);
2129 }
2130 } else {
2131 if (SM.onInteger(IntVal, ErrMsg))
2132 return Error(Loc, ErrMsg);
2133 }
2134 break;
2135 }
2136 case AsmToken::Plus:
2137 if (SM.onPlus(ErrMsg))
2138 return Error(getTok().getLoc(), ErrMsg);
2139 break;
2140 case AsmToken::Minus:
2141 if (SM.onMinus(ErrMsg))
2142 return Error(getTok().getLoc(), ErrMsg);
2143 break;
2144 case AsmToken::Tilde: SM.onNot(); break;
2145 case AsmToken::Star: SM.onStar(); break;
2146 case AsmToken::Slash: SM.onDivide(); break;
2147 case AsmToken::Percent: SM.onMod(); break;
2148 case AsmToken::Pipe: SM.onOr(); break;
2149 case AsmToken::Caret: SM.onXor(); break;
2150 case AsmToken::Amp: SM.onAnd(); break;
2151 case AsmToken::LessLess:
2152 SM.onLShift(); break;
2153 case AsmToken::GreaterGreater:
2154 SM.onRShift(); break;
2155 case AsmToken::LBrac:
2156 if (SM.onLBrac())
2157 return Error(Tok.getLoc(), "unexpected bracket encountered");
2158 tryParseOperandIdx(PrevTK, SM);
2159 break;
2160 case AsmToken::RBrac:
2161 if (SM.onRBrac(ErrMsg)) {
2162 return Error(Tok.getLoc(), ErrMsg);
2163 }
2164 break;
2165 case AsmToken::LParen: SM.onLParen(); break;
2166 case AsmToken::RParen: SM.onRParen(); break;
2167 }
2168 if (SM.hadError())
2169 return Error(Tok.getLoc(), "unknown token in expression");
2170
2171 if (!Done && UpdateLocLex)
2172 End = consumeToken();
2173
2174 PrevTK = TK;
2175 }
2176 return false;
2177 }
2178
RewriteIntelExpression(IntelExprStateMachine & SM,SMLoc Start,SMLoc End)2179 void X86AsmParser::RewriteIntelExpression(IntelExprStateMachine &SM,
2180 SMLoc Start, SMLoc End) {
2181 SMLoc Loc = Start;
2182 unsigned ExprLen = End.getPointer() - Start.getPointer();
2183 // Skip everything before a symbol displacement (if we have one)
2184 if (SM.getSym() && !SM.isOffsetOperator()) {
2185 StringRef SymName = SM.getSymName();
2186 if (unsigned Len = SymName.data() - Start.getPointer())
2187 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Start, Len);
2188 Loc = SMLoc::getFromPointer(SymName.data() + SymName.size());
2189 ExprLen = End.getPointer() - (SymName.data() + SymName.size());
2190 // If we have only a symbol than there's no need for complex rewrite,
2191 // simply skip everything after it
2192 if (!(SM.getBaseReg() || SM.getIndexReg() || SM.getImm())) {
2193 if (ExprLen)
2194 InstInfo->AsmRewrites->emplace_back(AOK_Skip, Loc, ExprLen);
2195 return;
2196 }
2197 }
2198 // Build an Intel Expression rewrite
2199 StringRef BaseRegStr;
2200 StringRef IndexRegStr;
2201 StringRef OffsetNameStr;
2202 if (SM.getBaseReg())
2203 BaseRegStr = X86IntelInstPrinter::getRegisterName(SM.getBaseReg());
2204 if (SM.getIndexReg())
2205 IndexRegStr = X86IntelInstPrinter::getRegisterName(SM.getIndexReg());
2206 if (SM.isOffsetOperator())
2207 OffsetNameStr = SM.getSymName();
2208 // Emit it
2209 IntelExpr Expr(BaseRegStr, IndexRegStr, SM.getScale(), OffsetNameStr,
2210 SM.getImm(), SM.isMemExpr());
2211 InstInfo->AsmRewrites->emplace_back(Loc, ExprLen, Expr);
2212 }
2213
2214 // Inline assembly may use variable names with namespace alias qualifiers.
ParseIntelInlineAsmIdentifier(const MCExpr * & Val,StringRef & Identifier,InlineAsmIdentifierInfo & Info,bool IsUnevaluatedOperand,SMLoc & End,bool IsParsingOffsetOperator)2215 bool X86AsmParser::ParseIntelInlineAsmIdentifier(
2216 const MCExpr *&Val, StringRef &Identifier, InlineAsmIdentifierInfo &Info,
2217 bool IsUnevaluatedOperand, SMLoc &End, bool IsParsingOffsetOperator) {
2218 MCAsmParser &Parser = getParser();
2219 assert(isParsingMSInlineAsm() && "Expected to be parsing inline assembly.");
2220 Val = nullptr;
2221
2222 StringRef LineBuf(Identifier.data());
2223 SemaCallback->LookupInlineAsmIdentifier(LineBuf, Info, IsUnevaluatedOperand);
2224
2225 const AsmToken &Tok = Parser.getTok();
2226 SMLoc Loc = Tok.getLoc();
2227
2228 // Advance the token stream until the end of the current token is
2229 // after the end of what the frontend claimed.
2230 const char *EndPtr = Tok.getLoc().getPointer() + LineBuf.size();
2231 do {
2232 End = Tok.getEndLoc();
2233 getLexer().Lex();
2234 } while (End.getPointer() < EndPtr);
2235 Identifier = LineBuf;
2236
2237 // The frontend should end parsing on an assembler token boundary, unless it
2238 // failed parsing.
2239 assert((End.getPointer() == EndPtr ||
2240 Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) &&
2241 "frontend claimed part of a token?");
2242
2243 // If the identifier lookup was unsuccessful, assume that we are dealing with
2244 // a label.
2245 if (Info.isKind(InlineAsmIdentifierInfo::IK_Invalid)) {
2246 StringRef InternalName =
2247 SemaCallback->LookupInlineAsmLabel(Identifier, getSourceManager(),
2248 Loc, false);
2249 assert(InternalName.size() && "We should have an internal name here.");
2250 // Push a rewrite for replacing the identifier name with the internal name,
2251 // unless we are parsing the operand of an offset operator
2252 if (!IsParsingOffsetOperator)
2253 InstInfo->AsmRewrites->emplace_back(AOK_Label, Loc, Identifier.size(),
2254 InternalName);
2255 else
2256 Identifier = InternalName;
2257 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal))
2258 return false;
2259 // Create the symbol reference.
2260 MCSymbol *Sym = getContext().getOrCreateSymbol(Identifier);
2261 MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
2262 Val = MCSymbolRefExpr::create(Sym, Variant, getParser().getContext());
2263 return false;
2264 }
2265
2266 //ParseRoundingModeOp - Parse AVX-512 rounding mode operand
ParseRoundingModeOp(SMLoc Start,OperandVector & Operands)2267 bool X86AsmParser::ParseRoundingModeOp(SMLoc Start, OperandVector &Operands) {
2268 MCAsmParser &Parser = getParser();
2269 const AsmToken &Tok = Parser.getTok();
2270 // Eat "{" and mark the current place.
2271 const SMLoc consumedToken = consumeToken();
2272 if (Tok.isNot(AsmToken::Identifier))
2273 return Error(Tok.getLoc(), "Expected an identifier after {");
2274 if (Tok.getIdentifier().startswith("r")){
2275 int rndMode = StringSwitch<int>(Tok.getIdentifier())
2276 .Case("rn", X86::STATIC_ROUNDING::TO_NEAREST_INT)
2277 .Case("rd", X86::STATIC_ROUNDING::TO_NEG_INF)
2278 .Case("ru", X86::STATIC_ROUNDING::TO_POS_INF)
2279 .Case("rz", X86::STATIC_ROUNDING::TO_ZERO)
2280 .Default(-1);
2281 if (-1 == rndMode)
2282 return Error(Tok.getLoc(), "Invalid rounding mode.");
2283 Parser.Lex(); // Eat "r*" of r*-sae
2284 if (!getLexer().is(AsmToken::Minus))
2285 return Error(Tok.getLoc(), "Expected - at this point");
2286 Parser.Lex(); // Eat "-"
2287 Parser.Lex(); // Eat the sae
2288 if (!getLexer().is(AsmToken::RCurly))
2289 return Error(Tok.getLoc(), "Expected } at this point");
2290 SMLoc End = Tok.getEndLoc();
2291 Parser.Lex(); // Eat "}"
2292 const MCExpr *RndModeOp =
2293 MCConstantExpr::create(rndMode, Parser.getContext());
2294 Operands.push_back(X86Operand::CreateImm(RndModeOp, Start, End));
2295 return false;
2296 }
2297 if(Tok.getIdentifier().equals("sae")){
2298 Parser.Lex(); // Eat the sae
2299 if (!getLexer().is(AsmToken::RCurly))
2300 return Error(Tok.getLoc(), "Expected } at this point");
2301 Parser.Lex(); // Eat "}"
2302 Operands.push_back(X86Operand::CreateToken("{sae}", consumedToken));
2303 return false;
2304 }
2305 return Error(Tok.getLoc(), "unknown token in expression");
2306 }
2307
2308 /// Parse the '.' operator.
ParseIntelDotOperator(IntelExprStateMachine & SM,SMLoc & End)2309 bool X86AsmParser::ParseIntelDotOperator(IntelExprStateMachine &SM,
2310 SMLoc &End) {
2311 const AsmToken &Tok = getTok();
2312 AsmFieldInfo Info;
2313
2314 // Drop the optional '.'.
2315 StringRef DotDispStr = Tok.getString();
2316 if (DotDispStr.startswith("."))
2317 DotDispStr = DotDispStr.drop_front(1);
2318 StringRef TrailingDot;
2319
2320 // .Imm gets lexed as a real.
2321 if (Tok.is(AsmToken::Real)) {
2322 APInt DotDisp;
2323 DotDispStr.getAsInteger(10, DotDisp);
2324 Info.Offset = DotDisp.getZExtValue();
2325 } else if ((isParsingMSInlineAsm() || getParser().isParsingMasm()) &&
2326 Tok.is(AsmToken::Identifier)) {
2327 if (DotDispStr.endswith(".")) {
2328 TrailingDot = DotDispStr.substr(DotDispStr.size() - 1);
2329 DotDispStr = DotDispStr.drop_back(1);
2330 }
2331 const std::pair<StringRef, StringRef> BaseMember = DotDispStr.split('.');
2332 const StringRef Base = BaseMember.first, Member = BaseMember.second;
2333 if (getParser().lookUpField(SM.getType(), DotDispStr, Info) &&
2334 getParser().lookUpField(SM.getSymName(), DotDispStr, Info) &&
2335 getParser().lookUpField(DotDispStr, Info) &&
2336 (!SemaCallback ||
2337 SemaCallback->LookupInlineAsmField(Base, Member, Info.Offset)))
2338 return Error(Tok.getLoc(), "Unable to lookup field reference!");
2339 } else {
2340 return Error(Tok.getLoc(), "Unexpected token type!");
2341 }
2342
2343 // Eat the DotExpression and update End
2344 End = SMLoc::getFromPointer(DotDispStr.data());
2345 const char *DotExprEndLoc = DotDispStr.data() + DotDispStr.size();
2346 while (Tok.getLoc().getPointer() < DotExprEndLoc)
2347 Lex();
2348 if (!TrailingDot.empty())
2349 getLexer().UnLex(AsmToken(AsmToken::Dot, TrailingDot));
2350 SM.addImm(Info.Offset);
2351 SM.setTypeInfo(Info.Type);
2352 return false;
2353 }
2354
2355 /// Parse the 'offset' operator.
2356 /// This operator is used to specify the location of a given operand
ParseIntelOffsetOperator(const MCExpr * & Val,StringRef & ID,InlineAsmIdentifierInfo & Info,SMLoc & End)2357 bool X86AsmParser::ParseIntelOffsetOperator(const MCExpr *&Val, StringRef &ID,
2358 InlineAsmIdentifierInfo &Info,
2359 SMLoc &End) {
2360 // Eat offset, mark start of identifier.
2361 SMLoc Start = Lex().getLoc();
2362 ID = getTok().getString();
2363 if (!isParsingMSInlineAsm()) {
2364 if ((getTok().isNot(AsmToken::Identifier) &&
2365 getTok().isNot(AsmToken::String)) ||
2366 getParser().parsePrimaryExpr(Val, End, nullptr))
2367 return Error(Start, "unexpected token!");
2368 } else if (ParseIntelInlineAsmIdentifier(Val, ID, Info, false, End, true)) {
2369 return Error(Start, "unable to lookup expression");
2370 } else if (Info.isKind(InlineAsmIdentifierInfo::IK_EnumVal)) {
2371 return Error(Start, "offset operator cannot yet handle constants");
2372 }
2373 return false;
2374 }
2375
2376 // Query a candidate string for being an Intel assembly operator
2377 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyIntelInlineAsmOperator(StringRef Name)2378 unsigned X86AsmParser::IdentifyIntelInlineAsmOperator(StringRef Name) {
2379 return StringSwitch<unsigned>(Name)
2380 .Cases("TYPE","type",IOK_TYPE)
2381 .Cases("SIZE","size",IOK_SIZE)
2382 .Cases("LENGTH","length",IOK_LENGTH)
2383 .Default(IOK_INVALID);
2384 }
2385
2386 /// Parse the 'LENGTH', 'TYPE' and 'SIZE' operators. The LENGTH operator
2387 /// returns the number of elements in an array. It returns the value 1 for
2388 /// non-array variables. The SIZE operator returns the size of a C or C++
2389 /// variable. A variable's size is the product of its LENGTH and TYPE. The
2390 /// TYPE operator returns the size of a C or C++ type or variable. If the
2391 /// variable is an array, TYPE returns the size of a single element.
ParseIntelInlineAsmOperator(unsigned OpKind)2392 unsigned X86AsmParser::ParseIntelInlineAsmOperator(unsigned OpKind) {
2393 MCAsmParser &Parser = getParser();
2394 const AsmToken &Tok = Parser.getTok();
2395 Parser.Lex(); // Eat operator.
2396
2397 const MCExpr *Val = nullptr;
2398 InlineAsmIdentifierInfo Info;
2399 SMLoc Start = Tok.getLoc(), End;
2400 StringRef Identifier = Tok.getString();
2401 if (ParseIntelInlineAsmIdentifier(Val, Identifier, Info,
2402 /*IsUnevaluatedOperand=*/true, End))
2403 return 0;
2404
2405 if (!Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2406 Error(Start, "unable to lookup expression");
2407 return 0;
2408 }
2409
2410 unsigned CVal = 0;
2411 switch(OpKind) {
2412 default: llvm_unreachable("Unexpected operand kind!");
2413 case IOK_LENGTH: CVal = Info.Var.Length; break;
2414 case IOK_SIZE: CVal = Info.Var.Size; break;
2415 case IOK_TYPE: CVal = Info.Var.Type; break;
2416 }
2417
2418 return CVal;
2419 }
2420
2421 // Query a candidate string for being an Intel assembly operator
2422 // Report back its kind, or IOK_INVALID if does not evaluated as a known one
IdentifyMasmOperator(StringRef Name)2423 unsigned X86AsmParser::IdentifyMasmOperator(StringRef Name) {
2424 return StringSwitch<unsigned>(Name.lower())
2425 .Case("type", MOK_TYPE)
2426 .Cases("size", "sizeof", MOK_SIZEOF)
2427 .Cases("length", "lengthof", MOK_LENGTHOF)
2428 .Default(MOK_INVALID);
2429 }
2430
2431 /// Parse the 'LENGTHOF', 'SIZEOF', and 'TYPE' operators. The LENGTHOF operator
2432 /// returns the number of elements in an array. It returns the value 1 for
2433 /// non-array variables. The SIZEOF operator returns the size of a type or
2434 /// variable in bytes. A variable's size is the product of its LENGTH and TYPE.
2435 /// The TYPE operator returns the size of a variable. If the variable is an
2436 /// array, TYPE returns the size of a single element.
ParseMasmOperator(unsigned OpKind,int64_t & Val)2437 bool X86AsmParser::ParseMasmOperator(unsigned OpKind, int64_t &Val) {
2438 MCAsmParser &Parser = getParser();
2439 SMLoc OpLoc = Parser.getTok().getLoc();
2440 Parser.Lex(); // Eat operator.
2441
2442 Val = 0;
2443 if (OpKind == MOK_SIZEOF || OpKind == MOK_TYPE) {
2444 // Check for SIZEOF(<type>) and TYPE(<type>).
2445 bool InParens = Parser.getTok().is(AsmToken::LParen);
2446 const AsmToken &IDTok = InParens ? getLexer().peekTok() : Parser.getTok();
2447 AsmTypeInfo Type;
2448 if (IDTok.is(AsmToken::Identifier) &&
2449 !Parser.lookUpType(IDTok.getIdentifier(), Type)) {
2450 Val = Type.Size;
2451
2452 // Eat tokens.
2453 if (InParens)
2454 parseToken(AsmToken::LParen);
2455 parseToken(AsmToken::Identifier);
2456 if (InParens)
2457 parseToken(AsmToken::RParen);
2458 }
2459 }
2460
2461 if (!Val) {
2462 IntelExprStateMachine SM;
2463 SMLoc End, Start = Parser.getTok().getLoc();
2464 if (ParseIntelExpression(SM, End))
2465 return true;
2466
2467 switch (OpKind) {
2468 default:
2469 llvm_unreachable("Unexpected operand kind!");
2470 case MOK_SIZEOF:
2471 Val = SM.getSize();
2472 break;
2473 case MOK_LENGTHOF:
2474 Val = SM.getLength();
2475 break;
2476 case MOK_TYPE:
2477 Val = SM.getElementSize();
2478 break;
2479 }
2480
2481 if (!Val)
2482 return Error(OpLoc, "expression has unknown type", SMRange(Start, End));
2483 }
2484
2485 return false;
2486 }
2487
ParseIntelMemoryOperandSize(unsigned & Size)2488 bool X86AsmParser::ParseIntelMemoryOperandSize(unsigned &Size) {
2489 Size = StringSwitch<unsigned>(getTok().getString())
2490 .Cases("BYTE", "byte", 8)
2491 .Cases("WORD", "word", 16)
2492 .Cases("DWORD", "dword", 32)
2493 .Cases("FLOAT", "float", 32)
2494 .Cases("LONG", "long", 32)
2495 .Cases("FWORD", "fword", 48)
2496 .Cases("DOUBLE", "double", 64)
2497 .Cases("QWORD", "qword", 64)
2498 .Cases("MMWORD","mmword", 64)
2499 .Cases("XWORD", "xword", 80)
2500 .Cases("TBYTE", "tbyte", 80)
2501 .Cases("XMMWORD", "xmmword", 128)
2502 .Cases("YMMWORD", "ymmword", 256)
2503 .Cases("ZMMWORD", "zmmword", 512)
2504 .Default(0);
2505 if (Size) {
2506 const AsmToken &Tok = Lex(); // Eat operand size (e.g., byte, word).
2507 if (!(Tok.getString().equals("PTR") || Tok.getString().equals("ptr")))
2508 return Error(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!");
2509 Lex(); // Eat ptr.
2510 }
2511 return false;
2512 }
2513
parseIntelOperand(OperandVector & Operands,StringRef Name)2514 bool X86AsmParser::parseIntelOperand(OperandVector &Operands, StringRef Name) {
2515 MCAsmParser &Parser = getParser();
2516 const AsmToken &Tok = Parser.getTok();
2517 SMLoc Start, End;
2518
2519 // Parse optional Size directive.
2520 unsigned Size;
2521 if (ParseIntelMemoryOperandSize(Size))
2522 return true;
2523 bool PtrInOperand = bool(Size);
2524
2525 Start = Tok.getLoc();
2526
2527 // Rounding mode operand.
2528 if (getLexer().is(AsmToken::LCurly))
2529 return ParseRoundingModeOp(Start, Operands);
2530
2531 // Register operand.
2532 MCRegister RegNo;
2533 if (Tok.is(AsmToken::Identifier) && !parseRegister(RegNo, Start, End)) {
2534 if (RegNo == X86::RIP)
2535 return Error(Start, "rip can only be used as a base register");
2536 // A Register followed by ':' is considered a segment override
2537 if (Tok.isNot(AsmToken::Colon)) {
2538 if (PtrInOperand)
2539 return Error(Start, "expected memory operand after 'ptr', "
2540 "found register operand instead");
2541 Operands.push_back(X86Operand::CreateReg(RegNo, Start, End));
2542 return false;
2543 }
2544 // An alleged segment override. check if we have a valid segment register
2545 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo))
2546 return Error(Start, "invalid segment register");
2547 // Eat ':' and update Start location
2548 Start = Lex().getLoc();
2549 }
2550
2551 // Immediates and Memory
2552 IntelExprStateMachine SM;
2553 if (ParseIntelExpression(SM, End))
2554 return true;
2555
2556 if (isParsingMSInlineAsm())
2557 RewriteIntelExpression(SM, Start, Tok.getLoc());
2558
2559 int64_t Imm = SM.getImm();
2560 const MCExpr *Disp = SM.getSym();
2561 const MCExpr *ImmDisp = MCConstantExpr::create(Imm, getContext());
2562 if (Disp && Imm)
2563 Disp = MCBinaryExpr::createAdd(Disp, ImmDisp, getContext());
2564 if (!Disp)
2565 Disp = ImmDisp;
2566
2567 // RegNo != 0 specifies a valid segment register,
2568 // and we are parsing a segment override
2569 if (!SM.isMemExpr() && !RegNo) {
2570 if (isParsingMSInlineAsm() && SM.isOffsetOperator()) {
2571 const InlineAsmIdentifierInfo &Info = SM.getIdentifierInfo();
2572 if (Info.isKind(InlineAsmIdentifierInfo::IK_Var)) {
2573 // Disp includes the address of a variable; make sure this is recorded
2574 // for later handling.
2575 Operands.push_back(X86Operand::CreateImm(Disp, Start, End,
2576 SM.getSymName(), Info.Var.Decl,
2577 Info.Var.IsGlobalLV));
2578 return false;
2579 }
2580 }
2581
2582 Operands.push_back(X86Operand::CreateImm(Disp, Start, End));
2583 return false;
2584 }
2585
2586 StringRef ErrMsg;
2587 unsigned BaseReg = SM.getBaseReg();
2588 unsigned IndexReg = SM.getIndexReg();
2589 if (IndexReg && BaseReg == X86::RIP)
2590 BaseReg = 0;
2591 unsigned Scale = SM.getScale();
2592 if (!PtrInOperand)
2593 Size = SM.getElementSize() << 3;
2594
2595 if (Scale == 0 && BaseReg != X86::ESP && BaseReg != X86::RSP &&
2596 (IndexReg == X86::ESP || IndexReg == X86::RSP))
2597 std::swap(BaseReg, IndexReg);
2598
2599 // If BaseReg is a vector register and IndexReg is not, swap them unless
2600 // Scale was specified in which case it would be an error.
2601 if (Scale == 0 &&
2602 !(X86MCRegisterClasses[X86::VR128XRegClassID].contains(IndexReg) ||
2603 X86MCRegisterClasses[X86::VR256XRegClassID].contains(IndexReg) ||
2604 X86MCRegisterClasses[X86::VR512RegClassID].contains(IndexReg)) &&
2605 (X86MCRegisterClasses[X86::VR128XRegClassID].contains(BaseReg) ||
2606 X86MCRegisterClasses[X86::VR256XRegClassID].contains(BaseReg) ||
2607 X86MCRegisterClasses[X86::VR512RegClassID].contains(BaseReg)))
2608 std::swap(BaseReg, IndexReg);
2609
2610 if (Scale != 0 &&
2611 X86MCRegisterClasses[X86::GR16RegClassID].contains(IndexReg))
2612 return Error(Start, "16-bit addresses cannot have a scale");
2613
2614 // If there was no explicit scale specified, change it to 1.
2615 if (Scale == 0)
2616 Scale = 1;
2617
2618 // If this is a 16-bit addressing mode with the base and index in the wrong
2619 // order, swap them so CheckBaseRegAndIndexRegAndScale doesn't fail. It is
2620 // shared with att syntax where order matters.
2621 if ((BaseReg == X86::SI || BaseReg == X86::DI) &&
2622 (IndexReg == X86::BX || IndexReg == X86::BP))
2623 std::swap(BaseReg, IndexReg);
2624
2625 if ((BaseReg || IndexReg) &&
2626 CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
2627 ErrMsg))
2628 return Error(Start, ErrMsg);
2629 if (isParsingMSInlineAsm())
2630 return CreateMemForMSInlineAsm(RegNo, Disp, BaseReg, IndexReg, Scale, Start,
2631 End, Size, SM.getSymName(),
2632 SM.getIdentifierInfo(), Operands);
2633
2634 // When parsing x64 MS-style assembly, all non-absolute references to a named
2635 // variable default to RIP-relative.
2636 unsigned DefaultBaseReg = X86::NoRegister;
2637 bool MaybeDirectBranchDest = true;
2638
2639 if (Parser.isParsingMasm()) {
2640 bool IsUnconditionalBranch =
2641 Name.equals_insensitive("jmp") || Name.equals_insensitive("call");
2642 if (is64BitMode() && SM.getElementSize() > 0) {
2643 DefaultBaseReg = X86::RIP;
2644 }
2645 if (IsUnconditionalBranch) {
2646 if (PtrInOperand) {
2647 MaybeDirectBranchDest = false;
2648 if (is64BitMode())
2649 DefaultBaseReg = X86::RIP;
2650 } else if (!BaseReg && !IndexReg && Disp &&
2651 Disp->getKind() == MCExpr::SymbolRef) {
2652 if (is64BitMode()) {
2653 if (SM.getSize() == 8) {
2654 MaybeDirectBranchDest = false;
2655 DefaultBaseReg = X86::RIP;
2656 }
2657 } else {
2658 if (SM.getSize() == 4 || SM.getSize() == 2)
2659 MaybeDirectBranchDest = false;
2660 }
2661 }
2662 }
2663 }
2664
2665 if ((BaseReg || IndexReg || RegNo || DefaultBaseReg != X86::NoRegister))
2666 Operands.push_back(X86Operand::CreateMem(
2667 getPointerWidth(), RegNo, Disp, BaseReg, IndexReg, Scale, Start, End,
2668 Size, DefaultBaseReg, /*SymName=*/StringRef(), /*OpDecl=*/nullptr,
2669 /*FrontendSize=*/0, /*UseUpRegs=*/false, MaybeDirectBranchDest));
2670 else
2671 Operands.push_back(X86Operand::CreateMem(
2672 getPointerWidth(), Disp, Start, End, Size, /*SymName=*/StringRef(),
2673 /*OpDecl=*/nullptr, /*FrontendSize=*/0, /*UseUpRegs=*/false,
2674 MaybeDirectBranchDest));
2675 return false;
2676 }
2677
parseATTOperand(OperandVector & Operands)2678 bool X86AsmParser::parseATTOperand(OperandVector &Operands) {
2679 MCAsmParser &Parser = getParser();
2680 switch (getLexer().getKind()) {
2681 case AsmToken::Dollar: {
2682 // $42 or $ID -> immediate.
2683 SMLoc Start = Parser.getTok().getLoc(), End;
2684 Parser.Lex();
2685 const MCExpr *Val;
2686 // This is an immediate, so we should not parse a register. Do a precheck
2687 // for '%' to supercede intra-register parse errors.
2688 SMLoc L = Parser.getTok().getLoc();
2689 if (check(getLexer().is(AsmToken::Percent), L,
2690 "expected immediate expression") ||
2691 getParser().parseExpression(Val, End) ||
2692 check(isa<X86MCExpr>(Val), L, "expected immediate expression"))
2693 return true;
2694 Operands.push_back(X86Operand::CreateImm(Val, Start, End));
2695 return false;
2696 }
2697 case AsmToken::LCurly: {
2698 SMLoc Start = Parser.getTok().getLoc();
2699 return ParseRoundingModeOp(Start, Operands);
2700 }
2701 default: {
2702 // This a memory operand or a register. We have some parsing complications
2703 // as a '(' may be part of an immediate expression or the addressing mode
2704 // block. This is complicated by the fact that an assembler-level variable
2705 // may refer either to a register or an immediate expression.
2706
2707 SMLoc Loc = Parser.getTok().getLoc(), EndLoc;
2708 const MCExpr *Expr = nullptr;
2709 unsigned Reg = 0;
2710 if (getLexer().isNot(AsmToken::LParen)) {
2711 // No '(' so this is either a displacement expression or a register.
2712 if (Parser.parseExpression(Expr, EndLoc))
2713 return true;
2714 if (auto *RE = dyn_cast<X86MCExpr>(Expr)) {
2715 // Segment Register. Reset Expr and copy value to register.
2716 Expr = nullptr;
2717 Reg = RE->getRegNo();
2718
2719 // Check the register.
2720 if (Reg == X86::EIZ || Reg == X86::RIZ)
2721 return Error(
2722 Loc, "%eiz and %riz can only be used as index registers",
2723 SMRange(Loc, EndLoc));
2724 if (Reg == X86::RIP)
2725 return Error(Loc, "%rip can only be used as a base register",
2726 SMRange(Loc, EndLoc));
2727 // Return register that are not segment prefixes immediately.
2728 if (!Parser.parseOptionalToken(AsmToken::Colon)) {
2729 Operands.push_back(X86Operand::CreateReg(Reg, Loc, EndLoc));
2730 return false;
2731 }
2732 if (!X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(Reg))
2733 return Error(Loc, "invalid segment register");
2734 // Accept a '*' absolute memory reference after the segment. Place it
2735 // before the full memory operand.
2736 if (getLexer().is(AsmToken::Star))
2737 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
2738 }
2739 }
2740 // This is a Memory operand.
2741 return ParseMemOperand(Reg, Expr, Loc, EndLoc, Operands);
2742 }
2743 }
2744 }
2745
2746 // X86::COND_INVALID if not a recognized condition code or alternate mnemonic,
2747 // otherwise the EFLAGS Condition Code enumerator.
ParseConditionCode(StringRef CC)2748 X86::CondCode X86AsmParser::ParseConditionCode(StringRef CC) {
2749 return StringSwitch<X86::CondCode>(CC)
2750 .Case("o", X86::COND_O) // Overflow
2751 .Case("no", X86::COND_NO) // No Overflow
2752 .Cases("b", "nae", X86::COND_B) // Below/Neither Above nor Equal
2753 .Cases("ae", "nb", X86::COND_AE) // Above or Equal/Not Below
2754 .Cases("e", "z", X86::COND_E) // Equal/Zero
2755 .Cases("ne", "nz", X86::COND_NE) // Not Equal/Not Zero
2756 .Cases("be", "na", X86::COND_BE) // Below or Equal/Not Above
2757 .Cases("a", "nbe", X86::COND_A) // Above/Neither Below nor Equal
2758 .Case("s", X86::COND_S) // Sign
2759 .Case("ns", X86::COND_NS) // No Sign
2760 .Cases("p", "pe", X86::COND_P) // Parity/Parity Even
2761 .Cases("np", "po", X86::COND_NP) // No Parity/Parity Odd
2762 .Cases("l", "nge", X86::COND_L) // Less/Neither Greater nor Equal
2763 .Cases("ge", "nl", X86::COND_GE) // Greater or Equal/Not Less
2764 .Cases("le", "ng", X86::COND_LE) // Less or Equal/Not Greater
2765 .Cases("g", "nle", X86::COND_G) // Greater/Neither Less nor Equal
2766 .Default(X86::COND_INVALID);
2767 }
2768
2769 // true on failure, false otherwise
2770 // If no {z} mark was found - Parser doesn't advance
ParseZ(std::unique_ptr<X86Operand> & Z,const SMLoc & StartLoc)2771 bool X86AsmParser::ParseZ(std::unique_ptr<X86Operand> &Z,
2772 const SMLoc &StartLoc) {
2773 MCAsmParser &Parser = getParser();
2774 // Assuming we are just pass the '{' mark, quering the next token
2775 // Searched for {z}, but none was found. Return false, as no parsing error was
2776 // encountered
2777 if (!(getLexer().is(AsmToken::Identifier) &&
2778 (getLexer().getTok().getIdentifier() == "z")))
2779 return false;
2780 Parser.Lex(); // Eat z
2781 // Query and eat the '}' mark
2782 if (!getLexer().is(AsmToken::RCurly))
2783 return Error(getLexer().getLoc(), "Expected } at this point");
2784 Parser.Lex(); // Eat '}'
2785 // Assign Z with the {z} mark operand
2786 Z = X86Operand::CreateToken("{z}", StartLoc);
2787 return false;
2788 }
2789
2790 // true on failure, false otherwise
HandleAVX512Operand(OperandVector & Operands)2791 bool X86AsmParser::HandleAVX512Operand(OperandVector &Operands) {
2792 MCAsmParser &Parser = getParser();
2793 if (getLexer().is(AsmToken::LCurly)) {
2794 // Eat "{" and mark the current place.
2795 const SMLoc consumedToken = consumeToken();
2796 // Distinguish {1to<NUM>} from {%k<NUM>}.
2797 if(getLexer().is(AsmToken::Integer)) {
2798 // Parse memory broadcasting ({1to<NUM>}).
2799 if (getLexer().getTok().getIntVal() != 1)
2800 return TokError("Expected 1to<NUM> at this point");
2801 StringRef Prefix = getLexer().getTok().getString();
2802 Parser.Lex(); // Eat first token of 1to8
2803 if (!getLexer().is(AsmToken::Identifier))
2804 return TokError("Expected 1to<NUM> at this point");
2805 // Recognize only reasonable suffixes.
2806 SmallVector<char, 5> BroadcastVector;
2807 StringRef BroadcastString = (Prefix + getLexer().getTok().getIdentifier())
2808 .toStringRef(BroadcastVector);
2809 if (!BroadcastString.startswith("1to"))
2810 return TokError("Expected 1to<NUM> at this point");
2811 const char *BroadcastPrimitive =
2812 StringSwitch<const char *>(BroadcastString)
2813 .Case("1to2", "{1to2}")
2814 .Case("1to4", "{1to4}")
2815 .Case("1to8", "{1to8}")
2816 .Case("1to16", "{1to16}")
2817 .Case("1to32", "{1to32}")
2818 .Default(nullptr);
2819 if (!BroadcastPrimitive)
2820 return TokError("Invalid memory broadcast primitive.");
2821 Parser.Lex(); // Eat trailing token of 1toN
2822 if (!getLexer().is(AsmToken::RCurly))
2823 return TokError("Expected } at this point");
2824 Parser.Lex(); // Eat "}"
2825 Operands.push_back(X86Operand::CreateToken(BroadcastPrimitive,
2826 consumedToken));
2827 // No AVX512 specific primitives can pass
2828 // after memory broadcasting, so return.
2829 return false;
2830 } else {
2831 // Parse either {k}{z}, {z}{k}, {k} or {z}
2832 // last one have no meaning, but GCC accepts it
2833 // Currently, we're just pass a '{' mark
2834 std::unique_ptr<X86Operand> Z;
2835 if (ParseZ(Z, consumedToken))
2836 return true;
2837 // Reaching here means that parsing of the allegadly '{z}' mark yielded
2838 // no errors.
2839 // Query for the need of further parsing for a {%k<NUM>} mark
2840 if (!Z || getLexer().is(AsmToken::LCurly)) {
2841 SMLoc StartLoc = Z ? consumeToken() : consumedToken;
2842 // Parse an op-mask register mark ({%k<NUM>}), which is now to be
2843 // expected
2844 MCRegister RegNo;
2845 SMLoc RegLoc;
2846 if (!parseRegister(RegNo, RegLoc, StartLoc) &&
2847 X86MCRegisterClasses[X86::VK1RegClassID].contains(RegNo)) {
2848 if (RegNo == X86::K0)
2849 return Error(RegLoc, "Register k0 can't be used as write mask");
2850 if (!getLexer().is(AsmToken::RCurly))
2851 return Error(getLexer().getLoc(), "Expected } at this point");
2852 Operands.push_back(X86Operand::CreateToken("{", StartLoc));
2853 Operands.push_back(
2854 X86Operand::CreateReg(RegNo, StartLoc, StartLoc));
2855 Operands.push_back(X86Operand::CreateToken("}", consumeToken()));
2856 } else
2857 return Error(getLexer().getLoc(),
2858 "Expected an op-mask register at this point");
2859 // {%k<NUM>} mark is found, inquire for {z}
2860 if (getLexer().is(AsmToken::LCurly) && !Z) {
2861 // Have we've found a parsing error, or found no (expected) {z} mark
2862 // - report an error
2863 if (ParseZ(Z, consumeToken()) || !Z)
2864 return Error(getLexer().getLoc(),
2865 "Expected a {z} mark at this point");
2866
2867 }
2868 // '{z}' on its own is meaningless, hence should be ignored.
2869 // on the contrary - have it been accompanied by a K register,
2870 // allow it.
2871 if (Z)
2872 Operands.push_back(std::move(Z));
2873 }
2874 }
2875 }
2876 return false;
2877 }
2878
2879 /// ParseMemOperand: 'seg : disp(basereg, indexreg, scale)'. The '%ds:' prefix
2880 /// has already been parsed if present. disp may be provided as well.
ParseMemOperand(unsigned SegReg,const MCExpr * Disp,SMLoc StartLoc,SMLoc EndLoc,OperandVector & Operands)2881 bool X86AsmParser::ParseMemOperand(unsigned SegReg, const MCExpr *Disp,
2882 SMLoc StartLoc, SMLoc EndLoc,
2883 OperandVector &Operands) {
2884 MCAsmParser &Parser = getParser();
2885 SMLoc Loc;
2886 // Based on the initial passed values, we may be in any of these cases, we are
2887 // in one of these cases (with current position (*)):
2888
2889 // 1. seg : * disp (base-index-scale-expr)
2890 // 2. seg : *(disp) (base-index-scale-expr)
2891 // 3. seg : *(base-index-scale-expr)
2892 // 4. disp *(base-index-scale-expr)
2893 // 5. *(disp) (base-index-scale-expr)
2894 // 6. *(base-index-scale-expr)
2895 // 7. disp *
2896 // 8. *(disp)
2897
2898 // If we do not have an displacement yet, check if we're in cases 4 or 6 by
2899 // checking if the first object after the parenthesis is a register (or an
2900 // identifier referring to a register) and parse the displacement or default
2901 // to 0 as appropriate.
2902 auto isAtMemOperand = [this]() {
2903 if (this->getLexer().isNot(AsmToken::LParen))
2904 return false;
2905 AsmToken Buf[2];
2906 StringRef Id;
2907 auto TokCount = this->getLexer().peekTokens(Buf, true);
2908 if (TokCount == 0)
2909 return false;
2910 switch (Buf[0].getKind()) {
2911 case AsmToken::Percent:
2912 case AsmToken::Comma:
2913 return true;
2914 // These lower cases are doing a peekIdentifier.
2915 case AsmToken::At:
2916 case AsmToken::Dollar:
2917 if ((TokCount > 1) &&
2918 (Buf[1].is(AsmToken::Identifier) || Buf[1].is(AsmToken::String)) &&
2919 (Buf[0].getLoc().getPointer() + 1 == Buf[1].getLoc().getPointer()))
2920 Id = StringRef(Buf[0].getLoc().getPointer(),
2921 Buf[1].getIdentifier().size() + 1);
2922 break;
2923 case AsmToken::Identifier:
2924 case AsmToken::String:
2925 Id = Buf[0].getIdentifier();
2926 break;
2927 default:
2928 return false;
2929 }
2930 // We have an ID. Check if it is bound to a register.
2931 if (!Id.empty()) {
2932 MCSymbol *Sym = this->getContext().getOrCreateSymbol(Id);
2933 if (Sym->isVariable()) {
2934 auto V = Sym->getVariableValue(/*SetUsed*/ false);
2935 return isa<X86MCExpr>(V);
2936 }
2937 }
2938 return false;
2939 };
2940
2941 if (!Disp) {
2942 // Parse immediate if we're not at a mem operand yet.
2943 if (!isAtMemOperand()) {
2944 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(Disp, EndLoc))
2945 return true;
2946 assert(!isa<X86MCExpr>(Disp) && "Expected non-register here.");
2947 } else {
2948 // Disp is implicitly zero if we haven't parsed it yet.
2949 Disp = MCConstantExpr::create(0, Parser.getContext());
2950 }
2951 }
2952
2953 // We are now either at the end of the operand or at the '(' at the start of a
2954 // base-index-scale-expr.
2955
2956 if (!parseOptionalToken(AsmToken::LParen)) {
2957 if (SegReg == 0)
2958 Operands.push_back(
2959 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
2960 else
2961 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
2962 0, 0, 1, StartLoc, EndLoc));
2963 return false;
2964 }
2965
2966 // If we reached here, then eat the '(' and Process
2967 // the rest of the memory operand.
2968 unsigned BaseReg = 0, IndexReg = 0, Scale = 1;
2969 SMLoc BaseLoc = getLexer().getLoc();
2970 const MCExpr *E;
2971 StringRef ErrMsg;
2972
2973 // Parse BaseReg if one is provided.
2974 if (getLexer().isNot(AsmToken::Comma) && getLexer().isNot(AsmToken::RParen)) {
2975 if (Parser.parseExpression(E, EndLoc) ||
2976 check(!isa<X86MCExpr>(E), BaseLoc, "expected register here"))
2977 return true;
2978
2979 // Check the register.
2980 BaseReg = cast<X86MCExpr>(E)->getRegNo();
2981 if (BaseReg == X86::EIZ || BaseReg == X86::RIZ)
2982 return Error(BaseLoc, "eiz and riz can only be used as index registers",
2983 SMRange(BaseLoc, EndLoc));
2984 }
2985
2986 if (parseOptionalToken(AsmToken::Comma)) {
2987 // Following the comma we should have either an index register, or a scale
2988 // value. We don't support the later form, but we want to parse it
2989 // correctly.
2990 //
2991 // Even though it would be completely consistent to support syntax like
2992 // "1(%eax,,1)", the assembler doesn't. Use "eiz" or "riz" for this.
2993 if (getLexer().isNot(AsmToken::RParen)) {
2994 if (Parser.parseTokenLoc(Loc) || Parser.parseExpression(E, EndLoc))
2995 return true;
2996
2997 if (!isa<X86MCExpr>(E)) {
2998 // We've parsed an unexpected Scale Value instead of an index
2999 // register. Interpret it as an absolute.
3000 int64_t ScaleVal;
3001 if (!E->evaluateAsAbsolute(ScaleVal, getStreamer().getAssemblerPtr()))
3002 return Error(Loc, "expected absolute expression");
3003 if (ScaleVal != 1)
3004 Warning(Loc, "scale factor without index register is ignored");
3005 Scale = 1;
3006 } else { // IndexReg Found.
3007 IndexReg = cast<X86MCExpr>(E)->getRegNo();
3008
3009 if (BaseReg == X86::RIP)
3010 return Error(Loc,
3011 "%rip as base register can not have an index register");
3012 if (IndexReg == X86::RIP)
3013 return Error(Loc, "%rip is not allowed as an index register");
3014
3015 if (parseOptionalToken(AsmToken::Comma)) {
3016 // Parse the scale amount:
3017 // ::= ',' [scale-expression]
3018
3019 // A scale amount without an index is ignored.
3020 if (getLexer().isNot(AsmToken::RParen)) {
3021 int64_t ScaleVal;
3022 if (Parser.parseTokenLoc(Loc) ||
3023 Parser.parseAbsoluteExpression(ScaleVal))
3024 return Error(Loc, "expected scale expression");
3025 Scale = (unsigned)ScaleVal;
3026 // Validate the scale amount.
3027 if (X86MCRegisterClasses[X86::GR16RegClassID].contains(BaseReg) &&
3028 Scale != 1)
3029 return Error(Loc, "scale factor in 16-bit address must be 1");
3030 if (checkScale(Scale, ErrMsg))
3031 return Error(Loc, ErrMsg);
3032 }
3033 }
3034 }
3035 }
3036 }
3037
3038 // Ok, we've eaten the memory operand, verify we have a ')' and eat it too.
3039 if (parseToken(AsmToken::RParen, "unexpected token in memory operand"))
3040 return true;
3041
3042 // This is to support otherwise illegal operand (%dx) found in various
3043 // unofficial manuals examples (e.g. "out[s]?[bwl]? %al, (%dx)") and must now
3044 // be supported. Mark such DX variants separately fix only in special cases.
3045 if (BaseReg == X86::DX && IndexReg == 0 && Scale == 1 && SegReg == 0 &&
3046 isa<MCConstantExpr>(Disp) &&
3047 cast<MCConstantExpr>(Disp)->getValue() == 0) {
3048 Operands.push_back(X86Operand::CreateDXReg(BaseLoc, BaseLoc));
3049 return false;
3050 }
3051
3052 if (CheckBaseRegAndIndexRegAndScale(BaseReg, IndexReg, Scale, is64BitMode(),
3053 ErrMsg))
3054 return Error(BaseLoc, ErrMsg);
3055
3056 if (SegReg || BaseReg || IndexReg)
3057 Operands.push_back(X86Operand::CreateMem(getPointerWidth(), SegReg, Disp,
3058 BaseReg, IndexReg, Scale, StartLoc,
3059 EndLoc));
3060 else
3061 Operands.push_back(
3062 X86Operand::CreateMem(getPointerWidth(), Disp, StartLoc, EndLoc));
3063 return false;
3064 }
3065
3066 // Parse either a standard primary expression or a register.
parsePrimaryExpr(const MCExpr * & Res,SMLoc & EndLoc)3067 bool X86AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) {
3068 MCAsmParser &Parser = getParser();
3069 // See if this is a register first.
3070 if (getTok().is(AsmToken::Percent) ||
3071 (isParsingIntelSyntax() && getTok().is(AsmToken::Identifier) &&
3072 MatchRegisterName(Parser.getTok().getString()))) {
3073 SMLoc StartLoc = Parser.getTok().getLoc();
3074 MCRegister RegNo;
3075 if (parseRegister(RegNo, StartLoc, EndLoc))
3076 return true;
3077 Res = X86MCExpr::create(RegNo, Parser.getContext());
3078 return false;
3079 }
3080 return Parser.parsePrimaryExpr(Res, EndLoc, nullptr);
3081 }
3082
ParseInstruction(ParseInstructionInfo & Info,StringRef Name,SMLoc NameLoc,OperandVector & Operands)3083 bool X86AsmParser::ParseInstruction(ParseInstructionInfo &Info, StringRef Name,
3084 SMLoc NameLoc, OperandVector &Operands) {
3085 MCAsmParser &Parser = getParser();
3086 InstInfo = &Info;
3087
3088 // Reset the forced VEX encoding.
3089 ForcedVEXEncoding = VEXEncoding_Default;
3090 ForcedDispEncoding = DispEncoding_Default;
3091
3092 // Parse pseudo prefixes.
3093 while (true) {
3094 if (Name == "{") {
3095 if (getLexer().isNot(AsmToken::Identifier))
3096 return Error(Parser.getTok().getLoc(), "Unexpected token after '{'");
3097 std::string Prefix = Parser.getTok().getString().lower();
3098 Parser.Lex(); // Eat identifier.
3099 if (getLexer().isNot(AsmToken::RCurly))
3100 return Error(Parser.getTok().getLoc(), "Expected '}'");
3101 Parser.Lex(); // Eat curly.
3102
3103 if (Prefix == "vex")
3104 ForcedVEXEncoding = VEXEncoding_VEX;
3105 else if (Prefix == "vex2")
3106 ForcedVEXEncoding = VEXEncoding_VEX2;
3107 else if (Prefix == "vex3")
3108 ForcedVEXEncoding = VEXEncoding_VEX3;
3109 else if (Prefix == "evex")
3110 ForcedVEXEncoding = VEXEncoding_EVEX;
3111 else if (Prefix == "disp8")
3112 ForcedDispEncoding = DispEncoding_Disp8;
3113 else if (Prefix == "disp32")
3114 ForcedDispEncoding = DispEncoding_Disp32;
3115 else
3116 return Error(NameLoc, "unknown prefix");
3117
3118 NameLoc = Parser.getTok().getLoc();
3119 if (getLexer().is(AsmToken::LCurly)) {
3120 Parser.Lex();
3121 Name = "{";
3122 } else {
3123 if (getLexer().isNot(AsmToken::Identifier))
3124 return Error(Parser.getTok().getLoc(), "Expected identifier");
3125 // FIXME: The mnemonic won't match correctly if its not in lower case.
3126 Name = Parser.getTok().getString();
3127 Parser.Lex();
3128 }
3129 continue;
3130 }
3131 // Parse MASM style pseudo prefixes.
3132 if (isParsingMSInlineAsm()) {
3133 if (Name.equals_insensitive("vex"))
3134 ForcedVEXEncoding = VEXEncoding_VEX;
3135 else if (Name.equals_insensitive("vex2"))
3136 ForcedVEXEncoding = VEXEncoding_VEX2;
3137 else if (Name.equals_insensitive("vex3"))
3138 ForcedVEXEncoding = VEXEncoding_VEX3;
3139 else if (Name.equals_insensitive("evex"))
3140 ForcedVEXEncoding = VEXEncoding_EVEX;
3141
3142 if (ForcedVEXEncoding != VEXEncoding_Default) {
3143 if (getLexer().isNot(AsmToken::Identifier))
3144 return Error(Parser.getTok().getLoc(), "Expected identifier");
3145 // FIXME: The mnemonic won't match correctly if its not in lower case.
3146 Name = Parser.getTok().getString();
3147 NameLoc = Parser.getTok().getLoc();
3148 Parser.Lex();
3149 }
3150 }
3151 break;
3152 }
3153
3154 // Support the suffix syntax for overriding displacement size as well.
3155 if (Name.consume_back(".d32")) {
3156 ForcedDispEncoding = DispEncoding_Disp32;
3157 } else if (Name.consume_back(".d8")) {
3158 ForcedDispEncoding = DispEncoding_Disp8;
3159 }
3160
3161 StringRef PatchedName = Name;
3162
3163 // Hack to skip "short" following Jcc.
3164 if (isParsingIntelSyntax() &&
3165 (PatchedName == "jmp" || PatchedName == "jc" || PatchedName == "jnc" ||
3166 PatchedName == "jcxz" || PatchedName == "jecxz" ||
3167 (PatchedName.startswith("j") &&
3168 ParseConditionCode(PatchedName.substr(1)) != X86::COND_INVALID))) {
3169 StringRef NextTok = Parser.getTok().getString();
3170 if (Parser.isParsingMasm() ? NextTok.equals_insensitive("short")
3171 : NextTok == "short") {
3172 SMLoc NameEndLoc =
3173 NameLoc.getFromPointer(NameLoc.getPointer() + Name.size());
3174 // Eat the short keyword.
3175 Parser.Lex();
3176 // MS and GAS ignore the short keyword; they both determine the jmp type
3177 // based on the distance of the label. (NASM does emit different code with
3178 // and without "short," though.)
3179 InstInfo->AsmRewrites->emplace_back(AOK_Skip, NameEndLoc,
3180 NextTok.size() + 1);
3181 }
3182 }
3183
3184 // FIXME: Hack to recognize setneb as setne.
3185 if (PatchedName.startswith("set") && PatchedName.endswith("b") &&
3186 PatchedName != "setb" && PatchedName != "setnb")
3187 PatchedName = PatchedName.substr(0, Name.size()-1);
3188
3189 unsigned ComparisonPredicate = ~0U;
3190
3191 // FIXME: Hack to recognize cmp<comparison code>{sh,ss,sd,ph,ps,pd}.
3192 if ((PatchedName.startswith("cmp") || PatchedName.startswith("vcmp")) &&
3193 (PatchedName.endswith("ss") || PatchedName.endswith("sd") ||
3194 PatchedName.endswith("sh") || PatchedName.endswith("ph") ||
3195 PatchedName.endswith("ps") || PatchedName.endswith("pd"))) {
3196 bool IsVCMP = PatchedName[0] == 'v';
3197 unsigned CCIdx = IsVCMP ? 4 : 3;
3198 unsigned CC = StringSwitch<unsigned>(
3199 PatchedName.slice(CCIdx, PatchedName.size() - 2))
3200 .Case("eq", 0x00)
3201 .Case("eq_oq", 0x00)
3202 .Case("lt", 0x01)
3203 .Case("lt_os", 0x01)
3204 .Case("le", 0x02)
3205 .Case("le_os", 0x02)
3206 .Case("unord", 0x03)
3207 .Case("unord_q", 0x03)
3208 .Case("neq", 0x04)
3209 .Case("neq_uq", 0x04)
3210 .Case("nlt", 0x05)
3211 .Case("nlt_us", 0x05)
3212 .Case("nle", 0x06)
3213 .Case("nle_us", 0x06)
3214 .Case("ord", 0x07)
3215 .Case("ord_q", 0x07)
3216 /* AVX only from here */
3217 .Case("eq_uq", 0x08)
3218 .Case("nge", 0x09)
3219 .Case("nge_us", 0x09)
3220 .Case("ngt", 0x0A)
3221 .Case("ngt_us", 0x0A)
3222 .Case("false", 0x0B)
3223 .Case("false_oq", 0x0B)
3224 .Case("neq_oq", 0x0C)
3225 .Case("ge", 0x0D)
3226 .Case("ge_os", 0x0D)
3227 .Case("gt", 0x0E)
3228 .Case("gt_os", 0x0E)
3229 .Case("true", 0x0F)
3230 .Case("true_uq", 0x0F)
3231 .Case("eq_os", 0x10)
3232 .Case("lt_oq", 0x11)
3233 .Case("le_oq", 0x12)
3234 .Case("unord_s", 0x13)
3235 .Case("neq_us", 0x14)
3236 .Case("nlt_uq", 0x15)
3237 .Case("nle_uq", 0x16)
3238 .Case("ord_s", 0x17)
3239 .Case("eq_us", 0x18)
3240 .Case("nge_uq", 0x19)
3241 .Case("ngt_uq", 0x1A)
3242 .Case("false_os", 0x1B)
3243 .Case("neq_os", 0x1C)
3244 .Case("ge_oq", 0x1D)
3245 .Case("gt_oq", 0x1E)
3246 .Case("true_us", 0x1F)
3247 .Default(~0U);
3248 if (CC != ~0U && (IsVCMP || CC < 8) &&
3249 (IsVCMP || PatchedName.back() != 'h')) {
3250 if (PatchedName.endswith("ss"))
3251 PatchedName = IsVCMP ? "vcmpss" : "cmpss";
3252 else if (PatchedName.endswith("sd"))
3253 PatchedName = IsVCMP ? "vcmpsd" : "cmpsd";
3254 else if (PatchedName.endswith("ps"))
3255 PatchedName = IsVCMP ? "vcmpps" : "cmpps";
3256 else if (PatchedName.endswith("pd"))
3257 PatchedName = IsVCMP ? "vcmppd" : "cmppd";
3258 else if (PatchedName.endswith("sh"))
3259 PatchedName = "vcmpsh";
3260 else if (PatchedName.endswith("ph"))
3261 PatchedName = "vcmpph";
3262 else
3263 llvm_unreachable("Unexpected suffix!");
3264
3265 ComparisonPredicate = CC;
3266 }
3267 }
3268
3269 // FIXME: Hack to recognize vpcmp<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3270 if (PatchedName.startswith("vpcmp") &&
3271 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3272 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3273 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3274 unsigned CC = StringSwitch<unsigned>(
3275 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3276 .Case("eq", 0x0) // Only allowed on unsigned. Checked below.
3277 .Case("lt", 0x1)
3278 .Case("le", 0x2)
3279 //.Case("false", 0x3) // Not a documented alias.
3280 .Case("neq", 0x4)
3281 .Case("nlt", 0x5)
3282 .Case("nle", 0x6)
3283 //.Case("true", 0x7) // Not a documented alias.
3284 .Default(~0U);
3285 if (CC != ~0U && (CC != 0 || SuffixSize == 2)) {
3286 switch (PatchedName.back()) {
3287 default: llvm_unreachable("Unexpected character!");
3288 case 'b': PatchedName = SuffixSize == 2 ? "vpcmpub" : "vpcmpb"; break;
3289 case 'w': PatchedName = SuffixSize == 2 ? "vpcmpuw" : "vpcmpw"; break;
3290 case 'd': PatchedName = SuffixSize == 2 ? "vpcmpud" : "vpcmpd"; break;
3291 case 'q': PatchedName = SuffixSize == 2 ? "vpcmpuq" : "vpcmpq"; break;
3292 }
3293 // Set up the immediate to push into the operands later.
3294 ComparisonPredicate = CC;
3295 }
3296 }
3297
3298 // FIXME: Hack to recognize vpcom<comparison code>{ub,uw,ud,uq,b,w,d,q}.
3299 if (PatchedName.startswith("vpcom") &&
3300 (PatchedName.back() == 'b' || PatchedName.back() == 'w' ||
3301 PatchedName.back() == 'd' || PatchedName.back() == 'q')) {
3302 unsigned SuffixSize = PatchedName.drop_back().back() == 'u' ? 2 : 1;
3303 unsigned CC = StringSwitch<unsigned>(
3304 PatchedName.slice(5, PatchedName.size() - SuffixSize))
3305 .Case("lt", 0x0)
3306 .Case("le", 0x1)
3307 .Case("gt", 0x2)
3308 .Case("ge", 0x3)
3309 .Case("eq", 0x4)
3310 .Case("neq", 0x5)
3311 .Case("false", 0x6)
3312 .Case("true", 0x7)
3313 .Default(~0U);
3314 if (CC != ~0U) {
3315 switch (PatchedName.back()) {
3316 default: llvm_unreachable("Unexpected character!");
3317 case 'b': PatchedName = SuffixSize == 2 ? "vpcomub" : "vpcomb"; break;
3318 case 'w': PatchedName = SuffixSize == 2 ? "vpcomuw" : "vpcomw"; break;
3319 case 'd': PatchedName = SuffixSize == 2 ? "vpcomud" : "vpcomd"; break;
3320 case 'q': PatchedName = SuffixSize == 2 ? "vpcomuq" : "vpcomq"; break;
3321 }
3322 // Set up the immediate to push into the operands later.
3323 ComparisonPredicate = CC;
3324 }
3325 }
3326
3327
3328 // Determine whether this is an instruction prefix.
3329 // FIXME:
3330 // Enhance prefixes integrity robustness. for example, following forms
3331 // are currently tolerated:
3332 // repz repnz <insn> ; GAS errors for the use of two similar prefixes
3333 // lock addq %rax, %rbx ; Destination operand must be of memory type
3334 // xacquire <insn> ; xacquire must be accompanied by 'lock'
3335 bool IsPrefix =
3336 StringSwitch<bool>(Name)
3337 .Cases("cs", "ds", "es", "fs", "gs", "ss", true)
3338 .Cases("rex64", "data32", "data16", "addr32", "addr16", true)
3339 .Cases("xacquire", "xrelease", true)
3340 .Cases("acquire", "release", isParsingIntelSyntax())
3341 .Default(false);
3342
3343 auto isLockRepeatNtPrefix = [](StringRef N) {
3344 return StringSwitch<bool>(N)
3345 .Cases("lock", "rep", "repe", "repz", "repne", "repnz", "notrack", true)
3346 .Default(false);
3347 };
3348
3349 bool CurlyAsEndOfStatement = false;
3350
3351 unsigned Flags = X86::IP_NO_PREFIX;
3352 while (isLockRepeatNtPrefix(Name.lower())) {
3353 unsigned Prefix =
3354 StringSwitch<unsigned>(Name)
3355 .Cases("lock", "lock", X86::IP_HAS_LOCK)
3356 .Cases("rep", "repe", "repz", X86::IP_HAS_REPEAT)
3357 .Cases("repne", "repnz", X86::IP_HAS_REPEAT_NE)
3358 .Cases("notrack", "notrack", X86::IP_HAS_NOTRACK)
3359 .Default(X86::IP_NO_PREFIX); // Invalid prefix (impossible)
3360 Flags |= Prefix;
3361 if (getLexer().is(AsmToken::EndOfStatement)) {
3362 // We don't have real instr with the given prefix
3363 // let's use the prefix as the instr.
3364 // TODO: there could be several prefixes one after another
3365 Flags = X86::IP_NO_PREFIX;
3366 break;
3367 }
3368 // FIXME: The mnemonic won't match correctly if its not in lower case.
3369 Name = Parser.getTok().getString();
3370 Parser.Lex(); // eat the prefix
3371 // Hack: we could have something like "rep # some comment" or
3372 // "lock; cmpxchg16b $1" or "lock\0A\09incl" or "lock/incl"
3373 while (Name.startswith(";") || Name.startswith("\n") ||
3374 Name.startswith("#") || Name.startswith("\t") ||
3375 Name.startswith("/")) {
3376 // FIXME: The mnemonic won't match correctly if its not in lower case.
3377 Name = Parser.getTok().getString();
3378 Parser.Lex(); // go to next prefix or instr
3379 }
3380 }
3381
3382 if (Flags)
3383 PatchedName = Name;
3384
3385 // Hacks to handle 'data16' and 'data32'
3386 if (PatchedName == "data16" && is16BitMode()) {
3387 return Error(NameLoc, "redundant data16 prefix");
3388 }
3389 if (PatchedName == "data32") {
3390 if (is32BitMode())
3391 return Error(NameLoc, "redundant data32 prefix");
3392 if (is64BitMode())
3393 return Error(NameLoc, "'data32' is not supported in 64-bit mode");
3394 // Hack to 'data16' for the table lookup.
3395 PatchedName = "data16";
3396
3397 if (getLexer().isNot(AsmToken::EndOfStatement)) {
3398 StringRef Next = Parser.getTok().getString();
3399 getLexer().Lex();
3400 // data32 effectively changes the instruction suffix.
3401 // TODO Generalize.
3402 if (Next == "callw")
3403 Next = "calll";
3404 if (Next == "ljmpw")
3405 Next = "ljmpl";
3406
3407 Name = Next;
3408 PatchedName = Name;
3409 ForcedDataPrefix = X86::Is32Bit;
3410 IsPrefix = false;
3411 }
3412 }
3413
3414 Operands.push_back(X86Operand::CreateToken(PatchedName, NameLoc));
3415
3416 // Push the immediate if we extracted one from the mnemonic.
3417 if (ComparisonPredicate != ~0U && !isParsingIntelSyntax()) {
3418 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3419 getParser().getContext());
3420 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3421 }
3422
3423 // This does the actual operand parsing. Don't parse any more if we have a
3424 // prefix juxtaposed with an operation like "lock incl 4(%rax)", because we
3425 // just want to parse the "lock" as the first instruction and the "incl" as
3426 // the next one.
3427 if (getLexer().isNot(AsmToken::EndOfStatement) && !IsPrefix) {
3428 // Parse '*' modifier.
3429 if (getLexer().is(AsmToken::Star))
3430 Operands.push_back(X86Operand::CreateToken("*", consumeToken()));
3431
3432 // Read the operands.
3433 while (true) {
3434 if (parseOperand(Operands, Name))
3435 return true;
3436 if (HandleAVX512Operand(Operands))
3437 return true;
3438
3439 // check for comma and eat it
3440 if (getLexer().is(AsmToken::Comma))
3441 Parser.Lex();
3442 else
3443 break;
3444 }
3445
3446 // In MS inline asm curly braces mark the beginning/end of a block,
3447 // therefore they should be interepreted as end of statement
3448 CurlyAsEndOfStatement =
3449 isParsingIntelSyntax() && isParsingMSInlineAsm() &&
3450 (getLexer().is(AsmToken::LCurly) || getLexer().is(AsmToken::RCurly));
3451 if (getLexer().isNot(AsmToken::EndOfStatement) && !CurlyAsEndOfStatement)
3452 return TokError("unexpected token in argument list");
3453 }
3454
3455 // Push the immediate if we extracted one from the mnemonic.
3456 if (ComparisonPredicate != ~0U && isParsingIntelSyntax()) {
3457 const MCExpr *ImmOp = MCConstantExpr::create(ComparisonPredicate,
3458 getParser().getContext());
3459 Operands.push_back(X86Operand::CreateImm(ImmOp, NameLoc, NameLoc));
3460 }
3461
3462 // Consume the EndOfStatement or the prefix separator Slash
3463 if (getLexer().is(AsmToken::EndOfStatement) ||
3464 (IsPrefix && getLexer().is(AsmToken::Slash)))
3465 Parser.Lex();
3466 else if (CurlyAsEndOfStatement)
3467 // Add an actual EndOfStatement before the curly brace
3468 Info.AsmRewrites->emplace_back(AOK_EndOfStatement,
3469 getLexer().getTok().getLoc(), 0);
3470
3471 // This is for gas compatibility and cannot be done in td.
3472 // Adding "p" for some floating point with no argument.
3473 // For example: fsub --> fsubp
3474 bool IsFp =
3475 Name == "fsub" || Name == "fdiv" || Name == "fsubr" || Name == "fdivr";
3476 if (IsFp && Operands.size() == 1) {
3477 const char *Repl = StringSwitch<const char *>(Name)
3478 .Case("fsub", "fsubp")
3479 .Case("fdiv", "fdivp")
3480 .Case("fsubr", "fsubrp")
3481 .Case("fdivr", "fdivrp");
3482 static_cast<X86Operand &>(*Operands[0]).setTokenValue(Repl);
3483 }
3484
3485 if ((Name == "mov" || Name == "movw" || Name == "movl") &&
3486 (Operands.size() == 3)) {
3487 X86Operand &Op1 = (X86Operand &)*Operands[1];
3488 X86Operand &Op2 = (X86Operand &)*Operands[2];
3489 SMLoc Loc = Op1.getEndLoc();
3490 // Moving a 32 or 16 bit value into a segment register has the same
3491 // behavior. Modify such instructions to always take shorter form.
3492 if (Op1.isReg() && Op2.isReg() &&
3493 X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(
3494 Op2.getReg()) &&
3495 (X86MCRegisterClasses[X86::GR16RegClassID].contains(Op1.getReg()) ||
3496 X86MCRegisterClasses[X86::GR32RegClassID].contains(Op1.getReg()))) {
3497 // Change instruction name to match new instruction.
3498 if (Name != "mov" && Name[3] == (is16BitMode() ? 'l' : 'w')) {
3499 Name = is16BitMode() ? "movw" : "movl";
3500 Operands[0] = X86Operand::CreateToken(Name, NameLoc);
3501 }
3502 // Select the correct equivalent 16-/32-bit source register.
3503 unsigned Reg =
3504 getX86SubSuperRegisterOrZero(Op1.getReg(), is16BitMode() ? 16 : 32);
3505 Operands[1] = X86Operand::CreateReg(Reg, Loc, Loc);
3506 }
3507 }
3508
3509 // This is a terrible hack to handle "out[s]?[bwl]? %al, (%dx)" ->
3510 // "outb %al, %dx". Out doesn't take a memory form, but this is a widely
3511 // documented form in various unofficial manuals, so a lot of code uses it.
3512 if ((Name == "outb" || Name == "outsb" || Name == "outw" || Name == "outsw" ||
3513 Name == "outl" || Name == "outsl" || Name == "out" || Name == "outs") &&
3514 Operands.size() == 3) {
3515 X86Operand &Op = (X86Operand &)*Operands.back();
3516 if (Op.isDXReg())
3517 Operands.back() = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3518 Op.getEndLoc());
3519 }
3520 // Same hack for "in[s]?[bwl]? (%dx), %al" -> "inb %dx, %al".
3521 if ((Name == "inb" || Name == "insb" || Name == "inw" || Name == "insw" ||
3522 Name == "inl" || Name == "insl" || Name == "in" || Name == "ins") &&
3523 Operands.size() == 3) {
3524 X86Operand &Op = (X86Operand &)*Operands[1];
3525 if (Op.isDXReg())
3526 Operands[1] = X86Operand::CreateReg(X86::DX, Op.getStartLoc(),
3527 Op.getEndLoc());
3528 }
3529
3530 SmallVector<std::unique_ptr<MCParsedAsmOperand>, 2> TmpOperands;
3531 bool HadVerifyError = false;
3532
3533 // Append default arguments to "ins[bwld]"
3534 if (Name.startswith("ins") &&
3535 (Operands.size() == 1 || Operands.size() == 3) &&
3536 (Name == "insb" || Name == "insw" || Name == "insl" || Name == "insd" ||
3537 Name == "ins")) {
3538
3539 AddDefaultSrcDestOperands(TmpOperands,
3540 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc),
3541 DefaultMemDIOperand(NameLoc));
3542 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3543 }
3544
3545 // Append default arguments to "outs[bwld]"
3546 if (Name.startswith("outs") &&
3547 (Operands.size() == 1 || Operands.size() == 3) &&
3548 (Name == "outsb" || Name == "outsw" || Name == "outsl" ||
3549 Name == "outsd" || Name == "outs")) {
3550 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3551 X86Operand::CreateReg(X86::DX, NameLoc, NameLoc));
3552 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3553 }
3554
3555 // Transform "lods[bwlq]" into "lods[bwlq] ($SIREG)" for appropriate
3556 // values of $SIREG according to the mode. It would be nice if this
3557 // could be achieved with InstAlias in the tables.
3558 if (Name.startswith("lods") &&
3559 (Operands.size() == 1 || Operands.size() == 2) &&
3560 (Name == "lods" || Name == "lodsb" || Name == "lodsw" ||
3561 Name == "lodsl" || Name == "lodsd" || Name == "lodsq")) {
3562 TmpOperands.push_back(DefaultMemSIOperand(NameLoc));
3563 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3564 }
3565
3566 // Transform "stos[bwlq]" into "stos[bwlq] ($DIREG)" for appropriate
3567 // values of $DIREG according to the mode. It would be nice if this
3568 // could be achieved with InstAlias in the tables.
3569 if (Name.startswith("stos") &&
3570 (Operands.size() == 1 || Operands.size() == 2) &&
3571 (Name == "stos" || Name == "stosb" || Name == "stosw" ||
3572 Name == "stosl" || Name == "stosd" || Name == "stosq")) {
3573 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3574 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3575 }
3576
3577 // Transform "scas[bwlq]" into "scas[bwlq] ($DIREG)" for appropriate
3578 // values of $DIREG according to the mode. It would be nice if this
3579 // could be achieved with InstAlias in the tables.
3580 if (Name.startswith("scas") &&
3581 (Operands.size() == 1 || Operands.size() == 2) &&
3582 (Name == "scas" || Name == "scasb" || Name == "scasw" ||
3583 Name == "scasl" || Name == "scasd" || Name == "scasq")) {
3584 TmpOperands.push_back(DefaultMemDIOperand(NameLoc));
3585 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3586 }
3587
3588 // Add default SI and DI operands to "cmps[bwlq]".
3589 if (Name.startswith("cmps") &&
3590 (Operands.size() == 1 || Operands.size() == 3) &&
3591 (Name == "cmps" || Name == "cmpsb" || Name == "cmpsw" ||
3592 Name == "cmpsl" || Name == "cmpsd" || Name == "cmpsq")) {
3593 AddDefaultSrcDestOperands(TmpOperands, DefaultMemDIOperand(NameLoc),
3594 DefaultMemSIOperand(NameLoc));
3595 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3596 }
3597
3598 // Add default SI and DI operands to "movs[bwlq]".
3599 if (((Name.startswith("movs") &&
3600 (Name == "movs" || Name == "movsb" || Name == "movsw" ||
3601 Name == "movsl" || Name == "movsd" || Name == "movsq")) ||
3602 (Name.startswith("smov") &&
3603 (Name == "smov" || Name == "smovb" || Name == "smovw" ||
3604 Name == "smovl" || Name == "smovd" || Name == "smovq"))) &&
3605 (Operands.size() == 1 || Operands.size() == 3)) {
3606 if (Name == "movsd" && Operands.size() == 1 && !isParsingIntelSyntax())
3607 Operands.back() = X86Operand::CreateToken("movsl", NameLoc);
3608 AddDefaultSrcDestOperands(TmpOperands, DefaultMemSIOperand(NameLoc),
3609 DefaultMemDIOperand(NameLoc));
3610 HadVerifyError = VerifyAndAdjustOperands(Operands, TmpOperands);
3611 }
3612
3613 // Check if we encountered an error for one the string insturctions
3614 if (HadVerifyError) {
3615 return HadVerifyError;
3616 }
3617
3618 // Transforms "xlat mem8" into "xlatb"
3619 if ((Name == "xlat" || Name == "xlatb") && Operands.size() == 2) {
3620 X86Operand &Op1 = static_cast<X86Operand &>(*Operands[1]);
3621 if (Op1.isMem8()) {
3622 Warning(Op1.getStartLoc(), "memory operand is only for determining the "
3623 "size, (R|E)BX will be used for the location");
3624 Operands.pop_back();
3625 static_cast<X86Operand &>(*Operands[0]).setTokenValue("xlatb");
3626 }
3627 }
3628
3629 if (Flags)
3630 Operands.push_back(X86Operand::CreatePrefix(Flags, NameLoc, NameLoc));
3631 return false;
3632 }
3633
processInstruction(MCInst & Inst,const OperandVector & Ops)3634 bool X86AsmParser::processInstruction(MCInst &Inst, const OperandVector &Ops) {
3635 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3636
3637 switch (Inst.getOpcode()) {
3638 default: return false;
3639 case X86::JMP_1:
3640 // {disp32} forces a larger displacement as if the instruction was relaxed.
3641 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3642 // This matches GNU assembler.
3643 if (ForcedDispEncoding == DispEncoding_Disp32) {
3644 Inst.setOpcode(is16BitMode() ? X86::JMP_2 : X86::JMP_4);
3645 return true;
3646 }
3647
3648 return false;
3649 case X86::JCC_1:
3650 // {disp32} forces a larger displacement as if the instruction was relaxed.
3651 // NOTE: 16-bit mode uses 16-bit displacement even though it says {disp32}.
3652 // This matches GNU assembler.
3653 if (ForcedDispEncoding == DispEncoding_Disp32) {
3654 Inst.setOpcode(is16BitMode() ? X86::JCC_2 : X86::JCC_4);
3655 return true;
3656 }
3657
3658 return false;
3659 case X86::VMOVZPQILo2PQIrr:
3660 case X86::VMOVAPDrr:
3661 case X86::VMOVAPDYrr:
3662 case X86::VMOVAPSrr:
3663 case X86::VMOVAPSYrr:
3664 case X86::VMOVDQArr:
3665 case X86::VMOVDQAYrr:
3666 case X86::VMOVDQUrr:
3667 case X86::VMOVDQUYrr:
3668 case X86::VMOVUPDrr:
3669 case X86::VMOVUPDYrr:
3670 case X86::VMOVUPSrr:
3671 case X86::VMOVUPSYrr: {
3672 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3673 // the registers is extended, but other isn't.
3674 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3675 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3676 MRI->getEncodingValue(Inst.getOperand(1).getReg()) < 8)
3677 return false;
3678
3679 unsigned NewOpc;
3680 switch (Inst.getOpcode()) {
3681 default: llvm_unreachable("Invalid opcode");
3682 case X86::VMOVZPQILo2PQIrr: NewOpc = X86::VMOVPQI2QIrr; break;
3683 case X86::VMOVAPDrr: NewOpc = X86::VMOVAPDrr_REV; break;
3684 case X86::VMOVAPDYrr: NewOpc = X86::VMOVAPDYrr_REV; break;
3685 case X86::VMOVAPSrr: NewOpc = X86::VMOVAPSrr_REV; break;
3686 case X86::VMOVAPSYrr: NewOpc = X86::VMOVAPSYrr_REV; break;
3687 case X86::VMOVDQArr: NewOpc = X86::VMOVDQArr_REV; break;
3688 case X86::VMOVDQAYrr: NewOpc = X86::VMOVDQAYrr_REV; break;
3689 case X86::VMOVDQUrr: NewOpc = X86::VMOVDQUrr_REV; break;
3690 case X86::VMOVDQUYrr: NewOpc = X86::VMOVDQUYrr_REV; break;
3691 case X86::VMOVUPDrr: NewOpc = X86::VMOVUPDrr_REV; break;
3692 case X86::VMOVUPDYrr: NewOpc = X86::VMOVUPDYrr_REV; break;
3693 case X86::VMOVUPSrr: NewOpc = X86::VMOVUPSrr_REV; break;
3694 case X86::VMOVUPSYrr: NewOpc = X86::VMOVUPSYrr_REV; break;
3695 }
3696 Inst.setOpcode(NewOpc);
3697 return true;
3698 }
3699 case X86::VMOVSDrr:
3700 case X86::VMOVSSrr: {
3701 // We can get a smaller encoding by using VEX.R instead of VEX.B if one of
3702 // the registers is extended, but other isn't.
3703 if (ForcedVEXEncoding == VEXEncoding_VEX3 ||
3704 MRI->getEncodingValue(Inst.getOperand(0).getReg()) >= 8 ||
3705 MRI->getEncodingValue(Inst.getOperand(2).getReg()) < 8)
3706 return false;
3707
3708 unsigned NewOpc;
3709 switch (Inst.getOpcode()) {
3710 default: llvm_unreachable("Invalid opcode");
3711 case X86::VMOVSDrr: NewOpc = X86::VMOVSDrr_REV; break;
3712 case X86::VMOVSSrr: NewOpc = X86::VMOVSSrr_REV; break;
3713 }
3714 Inst.setOpcode(NewOpc);
3715 return true;
3716 }
3717 case X86::RCR8ri: case X86::RCR16ri: case X86::RCR32ri: case X86::RCR64ri:
3718 case X86::RCL8ri: case X86::RCL16ri: case X86::RCL32ri: case X86::RCL64ri:
3719 case X86::ROR8ri: case X86::ROR16ri: case X86::ROR32ri: case X86::ROR64ri:
3720 case X86::ROL8ri: case X86::ROL16ri: case X86::ROL32ri: case X86::ROL64ri:
3721 case X86::SAR8ri: case X86::SAR16ri: case X86::SAR32ri: case X86::SAR64ri:
3722 case X86::SHR8ri: case X86::SHR16ri: case X86::SHR32ri: case X86::SHR64ri:
3723 case X86::SHL8ri: case X86::SHL16ri: case X86::SHL32ri: case X86::SHL64ri: {
3724 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3725 // FIXME: It would be great if we could just do this with an InstAlias.
3726 if (!Inst.getOperand(2).isImm() || Inst.getOperand(2).getImm() != 1)
3727 return false;
3728
3729 unsigned NewOpc;
3730 switch (Inst.getOpcode()) {
3731 default: llvm_unreachable("Invalid opcode");
3732 case X86::RCR8ri: NewOpc = X86::RCR8r1; break;
3733 case X86::RCR16ri: NewOpc = X86::RCR16r1; break;
3734 case X86::RCR32ri: NewOpc = X86::RCR32r1; break;
3735 case X86::RCR64ri: NewOpc = X86::RCR64r1; break;
3736 case X86::RCL8ri: NewOpc = X86::RCL8r1; break;
3737 case X86::RCL16ri: NewOpc = X86::RCL16r1; break;
3738 case X86::RCL32ri: NewOpc = X86::RCL32r1; break;
3739 case X86::RCL64ri: NewOpc = X86::RCL64r1; break;
3740 case X86::ROR8ri: NewOpc = X86::ROR8r1; break;
3741 case X86::ROR16ri: NewOpc = X86::ROR16r1; break;
3742 case X86::ROR32ri: NewOpc = X86::ROR32r1; break;
3743 case X86::ROR64ri: NewOpc = X86::ROR64r1; break;
3744 case X86::ROL8ri: NewOpc = X86::ROL8r1; break;
3745 case X86::ROL16ri: NewOpc = X86::ROL16r1; break;
3746 case X86::ROL32ri: NewOpc = X86::ROL32r1; break;
3747 case X86::ROL64ri: NewOpc = X86::ROL64r1; break;
3748 case X86::SAR8ri: NewOpc = X86::SAR8r1; break;
3749 case X86::SAR16ri: NewOpc = X86::SAR16r1; break;
3750 case X86::SAR32ri: NewOpc = X86::SAR32r1; break;
3751 case X86::SAR64ri: NewOpc = X86::SAR64r1; break;
3752 case X86::SHR8ri: NewOpc = X86::SHR8r1; break;
3753 case X86::SHR16ri: NewOpc = X86::SHR16r1; break;
3754 case X86::SHR32ri: NewOpc = X86::SHR32r1; break;
3755 case X86::SHR64ri: NewOpc = X86::SHR64r1; break;
3756 case X86::SHL8ri: NewOpc = X86::SHL8r1; break;
3757 case X86::SHL16ri: NewOpc = X86::SHL16r1; break;
3758 case X86::SHL32ri: NewOpc = X86::SHL32r1; break;
3759 case X86::SHL64ri: NewOpc = X86::SHL64r1; break;
3760 }
3761
3762 MCInst TmpInst;
3763 TmpInst.setOpcode(NewOpc);
3764 TmpInst.addOperand(Inst.getOperand(0));
3765 TmpInst.addOperand(Inst.getOperand(1));
3766 Inst = TmpInst;
3767 return true;
3768 }
3769 case X86::RCR8mi: case X86::RCR16mi: case X86::RCR32mi: case X86::RCR64mi:
3770 case X86::RCL8mi: case X86::RCL16mi: case X86::RCL32mi: case X86::RCL64mi:
3771 case X86::ROR8mi: case X86::ROR16mi: case X86::ROR32mi: case X86::ROR64mi:
3772 case X86::ROL8mi: case X86::ROL16mi: case X86::ROL32mi: case X86::ROL64mi:
3773 case X86::SAR8mi: case X86::SAR16mi: case X86::SAR32mi: case X86::SAR64mi:
3774 case X86::SHR8mi: case X86::SHR16mi: case X86::SHR32mi: case X86::SHR64mi:
3775 case X86::SHL8mi: case X86::SHL16mi: case X86::SHL32mi: case X86::SHL64mi: {
3776 // Optimize s{hr,ar,hl} $1, <op> to "shift <op>". Similar for rotate.
3777 // FIXME: It would be great if we could just do this with an InstAlias.
3778 if (!Inst.getOperand(X86::AddrNumOperands).isImm() ||
3779 Inst.getOperand(X86::AddrNumOperands).getImm() != 1)
3780 return false;
3781
3782 unsigned NewOpc;
3783 switch (Inst.getOpcode()) {
3784 default: llvm_unreachable("Invalid opcode");
3785 case X86::RCR8mi: NewOpc = X86::RCR8m1; break;
3786 case X86::RCR16mi: NewOpc = X86::RCR16m1; break;
3787 case X86::RCR32mi: NewOpc = X86::RCR32m1; break;
3788 case X86::RCR64mi: NewOpc = X86::RCR64m1; break;
3789 case X86::RCL8mi: NewOpc = X86::RCL8m1; break;
3790 case X86::RCL16mi: NewOpc = X86::RCL16m1; break;
3791 case X86::RCL32mi: NewOpc = X86::RCL32m1; break;
3792 case X86::RCL64mi: NewOpc = X86::RCL64m1; break;
3793 case X86::ROR8mi: NewOpc = X86::ROR8m1; break;
3794 case X86::ROR16mi: NewOpc = X86::ROR16m1; break;
3795 case X86::ROR32mi: NewOpc = X86::ROR32m1; break;
3796 case X86::ROR64mi: NewOpc = X86::ROR64m1; break;
3797 case X86::ROL8mi: NewOpc = X86::ROL8m1; break;
3798 case X86::ROL16mi: NewOpc = X86::ROL16m1; break;
3799 case X86::ROL32mi: NewOpc = X86::ROL32m1; break;
3800 case X86::ROL64mi: NewOpc = X86::ROL64m1; break;
3801 case X86::SAR8mi: NewOpc = X86::SAR8m1; break;
3802 case X86::SAR16mi: NewOpc = X86::SAR16m1; break;
3803 case X86::SAR32mi: NewOpc = X86::SAR32m1; break;
3804 case X86::SAR64mi: NewOpc = X86::SAR64m1; break;
3805 case X86::SHR8mi: NewOpc = X86::SHR8m1; break;
3806 case X86::SHR16mi: NewOpc = X86::SHR16m1; break;
3807 case X86::SHR32mi: NewOpc = X86::SHR32m1; break;
3808 case X86::SHR64mi: NewOpc = X86::SHR64m1; break;
3809 case X86::SHL8mi: NewOpc = X86::SHL8m1; break;
3810 case X86::SHL16mi: NewOpc = X86::SHL16m1; break;
3811 case X86::SHL32mi: NewOpc = X86::SHL32m1; break;
3812 case X86::SHL64mi: NewOpc = X86::SHL64m1; break;
3813 }
3814
3815 MCInst TmpInst;
3816 TmpInst.setOpcode(NewOpc);
3817 for (int i = 0; i != X86::AddrNumOperands; ++i)
3818 TmpInst.addOperand(Inst.getOperand(i));
3819 Inst = TmpInst;
3820 return true;
3821 }
3822 case X86::INT: {
3823 // Transforms "int $3" into "int3" as a size optimization. We can't write an
3824 // instalias with an immediate operand yet.
3825 if (!Inst.getOperand(0).isImm() || Inst.getOperand(0).getImm() != 3)
3826 return false;
3827
3828 MCInst TmpInst;
3829 TmpInst.setOpcode(X86::INT3);
3830 Inst = TmpInst;
3831 return true;
3832 }
3833 }
3834 }
3835
validateInstruction(MCInst & Inst,const OperandVector & Ops)3836 bool X86AsmParser::validateInstruction(MCInst &Inst, const OperandVector &Ops) {
3837 using namespace X86;
3838 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
3839 unsigned Opcode = Inst.getOpcode();
3840 uint64_t TSFlags = MII.get(Opcode).TSFlags;
3841 if (isVFCMADDCPH(Opcode) || isVFCMADDCSH(Opcode) || isVFMADDCPH(Opcode) ||
3842 isVFMADDCSH(Opcode)) {
3843 unsigned Dest = Inst.getOperand(0).getReg();
3844 for (unsigned i = 2; i < Inst.getNumOperands(); i++)
3845 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3846 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3847 "distinct from source registers");
3848 } else if (isVFCMULCPH(Opcode) || isVFCMULCSH(Opcode) || isVFMULCPH(Opcode) ||
3849 isVFMULCSH(Opcode)) {
3850 unsigned Dest = Inst.getOperand(0).getReg();
3851 // The mask variants have different operand list. Scan from the third
3852 // operand to avoid emitting incorrect warning.
3853 // VFMULCPHZrr Dest, Src1, Src2
3854 // VFMULCPHZrrk Dest, Dest, Mask, Src1, Src2
3855 // VFMULCPHZrrkz Dest, Mask, Src1, Src2
3856 for (unsigned i = TSFlags & X86II::EVEX_K ? 2 : 1;
3857 i < Inst.getNumOperands(); i++)
3858 if (Inst.getOperand(i).isReg() && Dest == Inst.getOperand(i).getReg())
3859 return Warning(Ops[0]->getStartLoc(), "Destination register should be "
3860 "distinct from source registers");
3861 } else if (isV4FMADDPS(Opcode) || isV4FMADDSS(Opcode) ||
3862 isV4FNMADDPS(Opcode) || isV4FNMADDSS(Opcode) ||
3863 isVP4DPWSSDS(Opcode) || isVP4DPWSSD(Opcode)) {
3864 unsigned Src2 = Inst.getOperand(Inst.getNumOperands() -
3865 X86::AddrNumOperands - 1).getReg();
3866 unsigned Src2Enc = MRI->getEncodingValue(Src2);
3867 if (Src2Enc % 4 != 0) {
3868 StringRef RegName = X86IntelInstPrinter::getRegisterName(Src2);
3869 unsigned GroupStart = (Src2Enc / 4) * 4;
3870 unsigned GroupEnd = GroupStart + 3;
3871 return Warning(Ops[0]->getStartLoc(),
3872 "source register '" + RegName + "' implicitly denotes '" +
3873 RegName.take_front(3) + Twine(GroupStart) + "' to '" +
3874 RegName.take_front(3) + Twine(GroupEnd) +
3875 "' source group");
3876 }
3877 } else if (isVGATHERDPD(Opcode) || isVGATHERDPS(Opcode) ||
3878 isVGATHERQPD(Opcode) || isVGATHERQPS(Opcode) ||
3879 isVPGATHERDD(Opcode) || isVPGATHERDQ(Opcode) ||
3880 isVPGATHERQD(Opcode) || isVPGATHERQQ(Opcode)) {
3881 bool HasEVEX = (TSFlags & X86II::EncodingMask) == X86II::EVEX;
3882 if (HasEVEX) {
3883 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3884 unsigned Index = MRI->getEncodingValue(
3885 Inst.getOperand(4 + X86::AddrIndexReg).getReg());
3886 if (Dest == Index)
3887 return Warning(Ops[0]->getStartLoc(), "index and destination registers "
3888 "should be distinct");
3889 } else {
3890 unsigned Dest = MRI->getEncodingValue(Inst.getOperand(0).getReg());
3891 unsigned Mask = MRI->getEncodingValue(Inst.getOperand(1).getReg());
3892 unsigned Index = MRI->getEncodingValue(
3893 Inst.getOperand(3 + X86::AddrIndexReg).getReg());
3894 if (Dest == Mask || Dest == Index || Mask == Index)
3895 return Warning(Ops[0]->getStartLoc(), "mask, index, and destination "
3896 "registers should be distinct");
3897 }
3898 }
3899
3900 // Check that we aren't mixing AH/BH/CH/DH with REX prefix. We only need to
3901 // check this with the legacy encoding, VEX/EVEX/XOP don't use REX.
3902 if ((TSFlags & X86II::EncodingMask) == 0) {
3903 MCPhysReg HReg = X86::NoRegister;
3904 bool UsesRex = TSFlags & X86II::REX_W;
3905 unsigned NumOps = Inst.getNumOperands();
3906 for (unsigned i = 0; i != NumOps; ++i) {
3907 const MCOperand &MO = Inst.getOperand(i);
3908 if (!MO.isReg())
3909 continue;
3910 unsigned Reg = MO.getReg();
3911 if (Reg == X86::AH || Reg == X86::BH || Reg == X86::CH || Reg == X86::DH)
3912 HReg = Reg;
3913 if (X86II::isX86_64NonExtLowByteReg(Reg) ||
3914 X86II::isX86_64ExtendedReg(Reg))
3915 UsesRex = true;
3916 }
3917
3918 if (UsesRex && HReg != X86::NoRegister) {
3919 StringRef RegName = X86IntelInstPrinter::getRegisterName(HReg);
3920 return Error(Ops[0]->getStartLoc(),
3921 "can't encode '" + RegName + "' in an instruction requiring "
3922 "REX prefix");
3923 }
3924 }
3925
3926 return false;
3927 }
3928
3929 static const char *getSubtargetFeatureName(uint64_t Val);
3930
emitWarningForSpecialLVIInstruction(SMLoc Loc)3931 void X86AsmParser::emitWarningForSpecialLVIInstruction(SMLoc Loc) {
3932 Warning(Loc, "Instruction may be vulnerable to LVI and "
3933 "requires manual mitigation");
3934 Note(SMLoc(), "See https://software.intel.com/"
3935 "security-software-guidance/insights/"
3936 "deep-dive-load-value-injection#specialinstructions"
3937 " for more information");
3938 }
3939
3940 /// RET instructions and also instructions that indirect calls/jumps from memory
3941 /// combine a load and a branch within a single instruction. To mitigate these
3942 /// instructions against LVI, they must be decomposed into separate load and
3943 /// branch instructions, with an LFENCE in between. For more details, see:
3944 /// - X86LoadValueInjectionRetHardening.cpp
3945 /// - X86LoadValueInjectionIndirectThunks.cpp
3946 /// - https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3947 ///
3948 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVICFIMitigation(MCInst & Inst,MCStreamer & Out)3949 void X86AsmParser::applyLVICFIMitigation(MCInst &Inst, MCStreamer &Out) {
3950 // Information on control-flow instructions that require manual mitigation can
3951 // be found here:
3952 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
3953 switch (Inst.getOpcode()) {
3954 case X86::RET16:
3955 case X86::RET32:
3956 case X86::RET64:
3957 case X86::RETI16:
3958 case X86::RETI32:
3959 case X86::RETI64: {
3960 MCInst ShlInst, FenceInst;
3961 bool Parse32 = is32BitMode() || Code16GCC;
3962 unsigned Basereg =
3963 is64BitMode() ? X86::RSP : (Parse32 ? X86::ESP : X86::SP);
3964 const MCExpr *Disp = MCConstantExpr::create(0, getContext());
3965 auto ShlMemOp = X86Operand::CreateMem(getPointerWidth(), /*SegReg=*/0, Disp,
3966 /*BaseReg=*/Basereg, /*IndexReg=*/0,
3967 /*Scale=*/1, SMLoc{}, SMLoc{}, 0);
3968 ShlInst.setOpcode(X86::SHL64mi);
3969 ShlMemOp->addMemOperands(ShlInst, 5);
3970 ShlInst.addOperand(MCOperand::createImm(0));
3971 FenceInst.setOpcode(X86::LFENCE);
3972 Out.emitInstruction(ShlInst, getSTI());
3973 Out.emitInstruction(FenceInst, getSTI());
3974 return;
3975 }
3976 case X86::JMP16m:
3977 case X86::JMP32m:
3978 case X86::JMP64m:
3979 case X86::CALL16m:
3980 case X86::CALL32m:
3981 case X86::CALL64m:
3982 emitWarningForSpecialLVIInstruction(Inst.getLoc());
3983 return;
3984 }
3985 }
3986
3987 /// To mitigate LVI, every instruction that performs a load can be followed by
3988 /// an LFENCE instruction to squash any potential mis-speculation. There are
3989 /// some instructions that require additional considerations, and may requre
3990 /// manual mitigation. For more details, see:
3991 /// https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection
3992 ///
3993 /// Returns `true` if a mitigation was applied or warning was emitted.
applyLVILoadHardeningMitigation(MCInst & Inst,MCStreamer & Out)3994 void X86AsmParser::applyLVILoadHardeningMitigation(MCInst &Inst,
3995 MCStreamer &Out) {
3996 auto Opcode = Inst.getOpcode();
3997 auto Flags = Inst.getFlags();
3998 if ((Flags & X86::IP_HAS_REPEAT) || (Flags & X86::IP_HAS_REPEAT_NE)) {
3999 // Information on REP string instructions that require manual mitigation can
4000 // be found here:
4001 // https://software.intel.com/security-software-guidance/insights/deep-dive-load-value-injection#specialinstructions
4002 switch (Opcode) {
4003 case X86::CMPSB:
4004 case X86::CMPSW:
4005 case X86::CMPSL:
4006 case X86::CMPSQ:
4007 case X86::SCASB:
4008 case X86::SCASW:
4009 case X86::SCASL:
4010 case X86::SCASQ:
4011 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4012 return;
4013 }
4014 } else if (Opcode == X86::REP_PREFIX || Opcode == X86::REPNE_PREFIX) {
4015 // If a REP instruction is found on its own line, it may or may not be
4016 // followed by a vulnerable instruction. Emit a warning just in case.
4017 emitWarningForSpecialLVIInstruction(Inst.getLoc());
4018 return;
4019 }
4020
4021 const MCInstrDesc &MCID = MII.get(Inst.getOpcode());
4022
4023 // Can't mitigate after terminators or calls. A control flow change may have
4024 // already occurred.
4025 if (MCID.isTerminator() || MCID.isCall())
4026 return;
4027
4028 // LFENCE has the mayLoad property, don't double fence.
4029 if (MCID.mayLoad() && Inst.getOpcode() != X86::LFENCE) {
4030 MCInst FenceInst;
4031 FenceInst.setOpcode(X86::LFENCE);
4032 Out.emitInstruction(FenceInst, getSTI());
4033 }
4034 }
4035
emitInstruction(MCInst & Inst,OperandVector & Operands,MCStreamer & Out)4036 void X86AsmParser::emitInstruction(MCInst &Inst, OperandVector &Operands,
4037 MCStreamer &Out) {
4038 if (LVIInlineAsmHardening &&
4039 getSTI().getFeatureBits()[X86::FeatureLVIControlFlowIntegrity])
4040 applyLVICFIMitigation(Inst, Out);
4041
4042 Out.emitInstruction(Inst, getSTI());
4043
4044 if (LVIInlineAsmHardening &&
4045 getSTI().getFeatureBits()[X86::FeatureLVILoadHardening])
4046 applyLVILoadHardeningMitigation(Inst, Out);
4047 }
4048
MatchAndEmitInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4049 bool X86AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode,
4050 OperandVector &Operands,
4051 MCStreamer &Out, uint64_t &ErrorInfo,
4052 bool MatchingInlineAsm) {
4053 if (isParsingIntelSyntax())
4054 return MatchAndEmitIntelInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4055 MatchingInlineAsm);
4056 return MatchAndEmitATTInstruction(IDLoc, Opcode, Operands, Out, ErrorInfo,
4057 MatchingInlineAsm);
4058 }
4059
MatchFPUWaitAlias(SMLoc IDLoc,X86Operand & Op,OperandVector & Operands,MCStreamer & Out,bool MatchingInlineAsm)4060 void X86AsmParser::MatchFPUWaitAlias(SMLoc IDLoc, X86Operand &Op,
4061 OperandVector &Operands, MCStreamer &Out,
4062 bool MatchingInlineAsm) {
4063 // FIXME: This should be replaced with a real .td file alias mechanism.
4064 // Also, MatchInstructionImpl should actually *do* the EmitInstruction
4065 // call.
4066 const char *Repl = StringSwitch<const char *>(Op.getToken())
4067 .Case("finit", "fninit")
4068 .Case("fsave", "fnsave")
4069 .Case("fstcw", "fnstcw")
4070 .Case("fstcww", "fnstcw")
4071 .Case("fstenv", "fnstenv")
4072 .Case("fstsw", "fnstsw")
4073 .Case("fstsww", "fnstsw")
4074 .Case("fclex", "fnclex")
4075 .Default(nullptr);
4076 if (Repl) {
4077 MCInst Inst;
4078 Inst.setOpcode(X86::WAIT);
4079 Inst.setLoc(IDLoc);
4080 if (!MatchingInlineAsm)
4081 emitInstruction(Inst, Operands, Out);
4082 Operands[0] = X86Operand::CreateToken(Repl, IDLoc);
4083 }
4084 }
4085
ErrorMissingFeature(SMLoc IDLoc,const FeatureBitset & MissingFeatures,bool MatchingInlineAsm)4086 bool X86AsmParser::ErrorMissingFeature(SMLoc IDLoc,
4087 const FeatureBitset &MissingFeatures,
4088 bool MatchingInlineAsm) {
4089 assert(MissingFeatures.any() && "Unknown missing feature!");
4090 SmallString<126> Msg;
4091 raw_svector_ostream OS(Msg);
4092 OS << "instruction requires:";
4093 for (unsigned i = 0, e = MissingFeatures.size(); i != e; ++i) {
4094 if (MissingFeatures[i])
4095 OS << ' ' << getSubtargetFeatureName(i);
4096 }
4097 return Error(IDLoc, OS.str(), SMRange(), MatchingInlineAsm);
4098 }
4099
getPrefixes(OperandVector & Operands)4100 static unsigned getPrefixes(OperandVector &Operands) {
4101 unsigned Result = 0;
4102 X86Operand &Prefix = static_cast<X86Operand &>(*Operands.back());
4103 if (Prefix.isPrefix()) {
4104 Result = Prefix.getPrefix();
4105 Operands.pop_back();
4106 }
4107 return Result;
4108 }
4109
checkTargetMatchPredicate(MCInst & Inst)4110 unsigned X86AsmParser::checkTargetMatchPredicate(MCInst &Inst) {
4111 unsigned Opc = Inst.getOpcode();
4112 const MCInstrDesc &MCID = MII.get(Opc);
4113
4114 if (ForcedVEXEncoding == VEXEncoding_EVEX &&
4115 (MCID.TSFlags & X86II::EncodingMask) != X86II::EVEX)
4116 return Match_Unsupported;
4117
4118 if ((ForcedVEXEncoding == VEXEncoding_VEX ||
4119 ForcedVEXEncoding == VEXEncoding_VEX2 ||
4120 ForcedVEXEncoding == VEXEncoding_VEX3) &&
4121 (MCID.TSFlags & X86II::EncodingMask) != X86II::VEX)
4122 return Match_Unsupported;
4123
4124 // These instructions are only available with {vex}, {vex2} or {vex3} prefix
4125 if (MCID.TSFlags & X86II::ExplicitVEXPrefix &&
4126 (ForcedVEXEncoding != VEXEncoding_VEX &&
4127 ForcedVEXEncoding != VEXEncoding_VEX2 &&
4128 ForcedVEXEncoding != VEXEncoding_VEX3))
4129 return Match_Unsupported;
4130
4131 return Match_Success;
4132 }
4133
MatchAndEmitATTInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4134 bool X86AsmParser::MatchAndEmitATTInstruction(SMLoc IDLoc, unsigned &Opcode,
4135 OperandVector &Operands,
4136 MCStreamer &Out,
4137 uint64_t &ErrorInfo,
4138 bool MatchingInlineAsm) {
4139 assert(!Operands.empty() && "Unexpect empty operand list!");
4140 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4141 SMRange EmptyRange = std::nullopt;
4142
4143 // First, handle aliases that expand to multiple instructions.
4144 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands,
4145 Out, MatchingInlineAsm);
4146 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4147 unsigned Prefixes = getPrefixes(Operands);
4148
4149 MCInst Inst;
4150
4151 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4152 // encoder and printer.
4153 if (ForcedVEXEncoding == VEXEncoding_VEX)
4154 Prefixes |= X86::IP_USE_VEX;
4155 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4156 Prefixes |= X86::IP_USE_VEX2;
4157 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4158 Prefixes |= X86::IP_USE_VEX3;
4159 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4160 Prefixes |= X86::IP_USE_EVEX;
4161
4162 // Set encoded flags for {disp8} and {disp32}.
4163 if (ForcedDispEncoding == DispEncoding_Disp8)
4164 Prefixes |= X86::IP_USE_DISP8;
4165 else if (ForcedDispEncoding == DispEncoding_Disp32)
4166 Prefixes |= X86::IP_USE_DISP32;
4167
4168 if (Prefixes)
4169 Inst.setFlags(Prefixes);
4170
4171 // In 16-bit mode, if data32 is specified, temporarily switch to 32-bit mode
4172 // when matching the instruction.
4173 if (ForcedDataPrefix == X86::Is32Bit)
4174 SwitchMode(X86::Is32Bit);
4175 // First, try a direct match.
4176 FeatureBitset MissingFeatures;
4177 unsigned OriginalError = MatchInstruction(Operands, Inst, ErrorInfo,
4178 MissingFeatures, MatchingInlineAsm,
4179 isParsingIntelSyntax());
4180 if (ForcedDataPrefix == X86::Is32Bit) {
4181 SwitchMode(X86::Is16Bit);
4182 ForcedDataPrefix = 0;
4183 }
4184 switch (OriginalError) {
4185 default: llvm_unreachable("Unexpected match result!");
4186 case Match_Success:
4187 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4188 return true;
4189 // Some instructions need post-processing to, for example, tweak which
4190 // encoding is selected. Loop on it while changes happen so the
4191 // individual transformations can chain off each other.
4192 if (!MatchingInlineAsm)
4193 while (processInstruction(Inst, Operands))
4194 ;
4195
4196 Inst.setLoc(IDLoc);
4197 if (!MatchingInlineAsm)
4198 emitInstruction(Inst, Operands, Out);
4199 Opcode = Inst.getOpcode();
4200 return false;
4201 case Match_InvalidImmUnsignedi4: {
4202 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4203 if (ErrorLoc == SMLoc())
4204 ErrorLoc = IDLoc;
4205 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4206 EmptyRange, MatchingInlineAsm);
4207 }
4208 case Match_MissingFeature:
4209 return ErrorMissingFeature(IDLoc, MissingFeatures, MatchingInlineAsm);
4210 case Match_InvalidOperand:
4211 case Match_MnemonicFail:
4212 case Match_Unsupported:
4213 break;
4214 }
4215 if (Op.getToken().empty()) {
4216 Error(IDLoc, "instruction must have size higher than 0", EmptyRange,
4217 MatchingInlineAsm);
4218 return true;
4219 }
4220
4221 // FIXME: Ideally, we would only attempt suffix matches for things which are
4222 // valid prefixes, and we could just infer the right unambiguous
4223 // type. However, that requires substantially more matcher support than the
4224 // following hack.
4225
4226 // Change the operand to point to a temporary token.
4227 StringRef Base = Op.getToken();
4228 SmallString<16> Tmp;
4229 Tmp += Base;
4230 Tmp += ' ';
4231 Op.setTokenValue(Tmp);
4232
4233 // If this instruction starts with an 'f', then it is a floating point stack
4234 // instruction. These come in up to three forms for 32-bit, 64-bit, and
4235 // 80-bit floating point, which use the suffixes s,l,t respectively.
4236 //
4237 // Otherwise, we assume that this may be an integer instruction, which comes
4238 // in 8/16/32/64-bit forms using the b,w,l,q suffixes respectively.
4239 const char *Suffixes = Base[0] != 'f' ? "bwlq" : "slt\0";
4240 // MemSize corresponding to Suffixes. { 8, 16, 32, 64 } { 32, 64, 80, 0 }
4241 const char *MemSize = Base[0] != 'f' ? "\x08\x10\x20\x40" : "\x20\x40\x50\0";
4242
4243 // Check for the various suffix matches.
4244 uint64_t ErrorInfoIgnore;
4245 FeatureBitset ErrorInfoMissingFeatures; // Init suppresses compiler warnings.
4246 unsigned Match[4];
4247
4248 // Some instruction like VPMULDQ is NOT the variant of VPMULD but a new one.
4249 // So we should make sure the suffix matcher only works for memory variant
4250 // that has the same size with the suffix.
4251 // FIXME: This flag is a workaround for legacy instructions that didn't
4252 // declare non suffix variant assembly.
4253 bool HasVectorReg = false;
4254 X86Operand *MemOp = nullptr;
4255 for (const auto &Op : Operands) {
4256 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4257 if (X86Op->isVectorReg())
4258 HasVectorReg = true;
4259 else if (X86Op->isMem()) {
4260 MemOp = X86Op;
4261 assert(MemOp->Mem.Size == 0 && "Memory size always 0 under ATT syntax");
4262 // Have we found an unqualified memory operand,
4263 // break. IA allows only one memory operand.
4264 break;
4265 }
4266 }
4267
4268 for (unsigned I = 0, E = std::size(Match); I != E; ++I) {
4269 Tmp.back() = Suffixes[I];
4270 if (MemOp && HasVectorReg)
4271 MemOp->Mem.Size = MemSize[I];
4272 Match[I] = Match_MnemonicFail;
4273 if (MemOp || !HasVectorReg) {
4274 Match[I] =
4275 MatchInstruction(Operands, Inst, ErrorInfoIgnore, MissingFeatures,
4276 MatchingInlineAsm, isParsingIntelSyntax());
4277 // If this returned as a missing feature failure, remember that.
4278 if (Match[I] == Match_MissingFeature)
4279 ErrorInfoMissingFeatures = MissingFeatures;
4280 }
4281 }
4282
4283 // Restore the old token.
4284 Op.setTokenValue(Base);
4285
4286 // If exactly one matched, then we treat that as a successful match (and the
4287 // instruction will already have been filled in correctly, since the failing
4288 // matches won't have modified it).
4289 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4290 if (NumSuccessfulMatches == 1) {
4291 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4292 return true;
4293 // Some instructions need post-processing to, for example, tweak which
4294 // encoding is selected. Loop on it while changes happen so the
4295 // individual transformations can chain off each other.
4296 if (!MatchingInlineAsm)
4297 while (processInstruction(Inst, Operands))
4298 ;
4299
4300 Inst.setLoc(IDLoc);
4301 if (!MatchingInlineAsm)
4302 emitInstruction(Inst, Operands, Out);
4303 Opcode = Inst.getOpcode();
4304 return false;
4305 }
4306
4307 // Otherwise, the match failed, try to produce a decent error message.
4308
4309 // If we had multiple suffix matches, then identify this as an ambiguous
4310 // match.
4311 if (NumSuccessfulMatches > 1) {
4312 char MatchChars[4];
4313 unsigned NumMatches = 0;
4314 for (unsigned I = 0, E = std::size(Match); I != E; ++I)
4315 if (Match[I] == Match_Success)
4316 MatchChars[NumMatches++] = Suffixes[I];
4317
4318 SmallString<126> Msg;
4319 raw_svector_ostream OS(Msg);
4320 OS << "ambiguous instructions require an explicit suffix (could be ";
4321 for (unsigned i = 0; i != NumMatches; ++i) {
4322 if (i != 0)
4323 OS << ", ";
4324 if (i + 1 == NumMatches)
4325 OS << "or ";
4326 OS << "'" << Base << MatchChars[i] << "'";
4327 }
4328 OS << ")";
4329 Error(IDLoc, OS.str(), EmptyRange, MatchingInlineAsm);
4330 return true;
4331 }
4332
4333 // Okay, we know that none of the variants matched successfully.
4334
4335 // If all of the instructions reported an invalid mnemonic, then the original
4336 // mnemonic was invalid.
4337 if (llvm::count(Match, Match_MnemonicFail) == 4) {
4338 if (OriginalError == Match_MnemonicFail)
4339 return Error(IDLoc, "invalid instruction mnemonic '" + Base + "'",
4340 Op.getLocRange(), MatchingInlineAsm);
4341
4342 if (OriginalError == Match_Unsupported)
4343 return Error(IDLoc, "unsupported instruction", EmptyRange,
4344 MatchingInlineAsm);
4345
4346 assert(OriginalError == Match_InvalidOperand && "Unexpected error");
4347 // Recover location info for the operand if we know which was the problem.
4348 if (ErrorInfo != ~0ULL) {
4349 if (ErrorInfo >= Operands.size())
4350 return Error(IDLoc, "too few operands for instruction", EmptyRange,
4351 MatchingInlineAsm);
4352
4353 X86Operand &Operand = (X86Operand &)*Operands[ErrorInfo];
4354 if (Operand.getStartLoc().isValid()) {
4355 SMRange OperandRange = Operand.getLocRange();
4356 return Error(Operand.getStartLoc(), "invalid operand for instruction",
4357 OperandRange, MatchingInlineAsm);
4358 }
4359 }
4360
4361 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4362 MatchingInlineAsm);
4363 }
4364
4365 // If one instruction matched as unsupported, report this as unsupported.
4366 if (llvm::count(Match, Match_Unsupported) == 1) {
4367 return Error(IDLoc, "unsupported instruction", EmptyRange,
4368 MatchingInlineAsm);
4369 }
4370
4371 // If one instruction matched with a missing feature, report this as a
4372 // missing feature.
4373 if (llvm::count(Match, Match_MissingFeature) == 1) {
4374 ErrorInfo = Match_MissingFeature;
4375 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4376 MatchingInlineAsm);
4377 }
4378
4379 // If one instruction matched with an invalid operand, report this as an
4380 // operand failure.
4381 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4382 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4383 MatchingInlineAsm);
4384 }
4385
4386 // If all of these were an outright failure, report it in a useless way.
4387 Error(IDLoc, "unknown use of instruction mnemonic without a size suffix",
4388 EmptyRange, MatchingInlineAsm);
4389 return true;
4390 }
4391
MatchAndEmitIntelInstruction(SMLoc IDLoc,unsigned & Opcode,OperandVector & Operands,MCStreamer & Out,uint64_t & ErrorInfo,bool MatchingInlineAsm)4392 bool X86AsmParser::MatchAndEmitIntelInstruction(SMLoc IDLoc, unsigned &Opcode,
4393 OperandVector &Operands,
4394 MCStreamer &Out,
4395 uint64_t &ErrorInfo,
4396 bool MatchingInlineAsm) {
4397 assert(!Operands.empty() && "Unexpect empty operand list!");
4398 assert((*Operands[0]).isToken() && "Leading operand should always be a mnemonic!");
4399 StringRef Mnemonic = (static_cast<X86Operand &>(*Operands[0])).getToken();
4400 SMRange EmptyRange = std::nullopt;
4401 StringRef Base = (static_cast<X86Operand &>(*Operands[0])).getToken();
4402 unsigned Prefixes = getPrefixes(Operands);
4403
4404 // First, handle aliases that expand to multiple instructions.
4405 MatchFPUWaitAlias(IDLoc, static_cast<X86Operand &>(*Operands[0]), Operands, Out, MatchingInlineAsm);
4406 X86Operand &Op = static_cast<X86Operand &>(*Operands[0]);
4407
4408 MCInst Inst;
4409
4410 // If VEX/EVEX encoding is forced, we need to pass the USE_* flag to the
4411 // encoder and printer.
4412 if (ForcedVEXEncoding == VEXEncoding_VEX)
4413 Prefixes |= X86::IP_USE_VEX;
4414 else if (ForcedVEXEncoding == VEXEncoding_VEX2)
4415 Prefixes |= X86::IP_USE_VEX2;
4416 else if (ForcedVEXEncoding == VEXEncoding_VEX3)
4417 Prefixes |= X86::IP_USE_VEX3;
4418 else if (ForcedVEXEncoding == VEXEncoding_EVEX)
4419 Prefixes |= X86::IP_USE_EVEX;
4420
4421 // Set encoded flags for {disp8} and {disp32}.
4422 if (ForcedDispEncoding == DispEncoding_Disp8)
4423 Prefixes |= X86::IP_USE_DISP8;
4424 else if (ForcedDispEncoding == DispEncoding_Disp32)
4425 Prefixes |= X86::IP_USE_DISP32;
4426
4427 if (Prefixes)
4428 Inst.setFlags(Prefixes);
4429
4430 // Find one unsized memory operand, if present.
4431 X86Operand *UnsizedMemOp = nullptr;
4432 for (const auto &Op : Operands) {
4433 X86Operand *X86Op = static_cast<X86Operand *>(Op.get());
4434 if (X86Op->isMemUnsized()) {
4435 UnsizedMemOp = X86Op;
4436 // Have we found an unqualified memory operand,
4437 // break. IA allows only one memory operand.
4438 break;
4439 }
4440 }
4441
4442 // Allow some instructions to have implicitly pointer-sized operands. This is
4443 // compatible with gas.
4444 if (UnsizedMemOp) {
4445 static const char *const PtrSizedInstrs[] = {"call", "jmp", "push"};
4446 for (const char *Instr : PtrSizedInstrs) {
4447 if (Mnemonic == Instr) {
4448 UnsizedMemOp->Mem.Size = getPointerWidth();
4449 break;
4450 }
4451 }
4452 }
4453
4454 SmallVector<unsigned, 8> Match;
4455 FeatureBitset ErrorInfoMissingFeatures;
4456 FeatureBitset MissingFeatures;
4457
4458 // If unsized push has immediate operand we should default the default pointer
4459 // size for the size.
4460 if (Mnemonic == "push" && Operands.size() == 2) {
4461 auto *X86Op = static_cast<X86Operand *>(Operands[1].get());
4462 if (X86Op->isImm()) {
4463 // If it's not a constant fall through and let remainder take care of it.
4464 const auto *CE = dyn_cast<MCConstantExpr>(X86Op->getImm());
4465 unsigned Size = getPointerWidth();
4466 if (CE &&
4467 (isIntN(Size, CE->getValue()) || isUIntN(Size, CE->getValue()))) {
4468 SmallString<16> Tmp;
4469 Tmp += Base;
4470 Tmp += (is64BitMode())
4471 ? "q"
4472 : (is32BitMode()) ? "l" : (is16BitMode()) ? "w" : " ";
4473 Op.setTokenValue(Tmp);
4474 // Do match in ATT mode to allow explicit suffix usage.
4475 Match.push_back(MatchInstruction(Operands, Inst, ErrorInfo,
4476 MissingFeatures, MatchingInlineAsm,
4477 false /*isParsingIntelSyntax()*/));
4478 Op.setTokenValue(Base);
4479 }
4480 }
4481 }
4482
4483 // If an unsized memory operand is present, try to match with each memory
4484 // operand size. In Intel assembly, the size is not part of the instruction
4485 // mnemonic.
4486 if (UnsizedMemOp && UnsizedMemOp->isMemUnsized()) {
4487 static const unsigned MopSizes[] = {8, 16, 32, 64, 80, 128, 256, 512};
4488 for (unsigned Size : MopSizes) {
4489 UnsizedMemOp->Mem.Size = Size;
4490 uint64_t ErrorInfoIgnore;
4491 unsigned LastOpcode = Inst.getOpcode();
4492 unsigned M = MatchInstruction(Operands, Inst, ErrorInfoIgnore,
4493 MissingFeatures, MatchingInlineAsm,
4494 isParsingIntelSyntax());
4495 if (Match.empty() || LastOpcode != Inst.getOpcode())
4496 Match.push_back(M);
4497
4498 // If this returned as a missing feature failure, remember that.
4499 if (Match.back() == Match_MissingFeature)
4500 ErrorInfoMissingFeatures = MissingFeatures;
4501 }
4502
4503 // Restore the size of the unsized memory operand if we modified it.
4504 UnsizedMemOp->Mem.Size = 0;
4505 }
4506
4507 // If we haven't matched anything yet, this is not a basic integer or FPU
4508 // operation. There shouldn't be any ambiguity in our mnemonic table, so try
4509 // matching with the unsized operand.
4510 if (Match.empty()) {
4511 Match.push_back(MatchInstruction(
4512 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4513 isParsingIntelSyntax()));
4514 // If this returned as a missing feature failure, remember that.
4515 if (Match.back() == Match_MissingFeature)
4516 ErrorInfoMissingFeatures = MissingFeatures;
4517 }
4518
4519 // Restore the size of the unsized memory operand if we modified it.
4520 if (UnsizedMemOp)
4521 UnsizedMemOp->Mem.Size = 0;
4522
4523 // If it's a bad mnemonic, all results will be the same.
4524 if (Match.back() == Match_MnemonicFail) {
4525 return Error(IDLoc, "invalid instruction mnemonic '" + Mnemonic + "'",
4526 Op.getLocRange(), MatchingInlineAsm);
4527 }
4528
4529 unsigned NumSuccessfulMatches = llvm::count(Match, Match_Success);
4530
4531 // If matching was ambiguous and we had size information from the frontend,
4532 // try again with that. This handles cases like "movxz eax, m8/m16".
4533 if (UnsizedMemOp && NumSuccessfulMatches > 1 &&
4534 UnsizedMemOp->getMemFrontendSize()) {
4535 UnsizedMemOp->Mem.Size = UnsizedMemOp->getMemFrontendSize();
4536 unsigned M = MatchInstruction(
4537 Operands, Inst, ErrorInfo, MissingFeatures, MatchingInlineAsm,
4538 isParsingIntelSyntax());
4539 if (M == Match_Success)
4540 NumSuccessfulMatches = 1;
4541
4542 // Add a rewrite that encodes the size information we used from the
4543 // frontend.
4544 InstInfo->AsmRewrites->emplace_back(
4545 AOK_SizeDirective, UnsizedMemOp->getStartLoc(),
4546 /*Len=*/0, UnsizedMemOp->getMemFrontendSize());
4547 }
4548
4549 // If exactly one matched, then we treat that as a successful match (and the
4550 // instruction will already have been filled in correctly, since the failing
4551 // matches won't have modified it).
4552 if (NumSuccessfulMatches == 1) {
4553 if (!MatchingInlineAsm && validateInstruction(Inst, Operands))
4554 return true;
4555 // Some instructions need post-processing to, for example, tweak which
4556 // encoding is selected. Loop on it while changes happen so the individual
4557 // transformations can chain off each other.
4558 if (!MatchingInlineAsm)
4559 while (processInstruction(Inst, Operands))
4560 ;
4561 Inst.setLoc(IDLoc);
4562 if (!MatchingInlineAsm)
4563 emitInstruction(Inst, Operands, Out);
4564 Opcode = Inst.getOpcode();
4565 return false;
4566 } else if (NumSuccessfulMatches > 1) {
4567 assert(UnsizedMemOp &&
4568 "multiple matches only possible with unsized memory operands");
4569 return Error(UnsizedMemOp->getStartLoc(),
4570 "ambiguous operand size for instruction '" + Mnemonic + "\'",
4571 UnsizedMemOp->getLocRange());
4572 }
4573
4574 // If one instruction matched as unsupported, report this as unsupported.
4575 if (llvm::count(Match, Match_Unsupported) == 1) {
4576 return Error(IDLoc, "unsupported instruction", EmptyRange,
4577 MatchingInlineAsm);
4578 }
4579
4580 // If one instruction matched with a missing feature, report this as a
4581 // missing feature.
4582 if (llvm::count(Match, Match_MissingFeature) == 1) {
4583 ErrorInfo = Match_MissingFeature;
4584 return ErrorMissingFeature(IDLoc, ErrorInfoMissingFeatures,
4585 MatchingInlineAsm);
4586 }
4587
4588 // If one instruction matched with an invalid operand, report this as an
4589 // operand failure.
4590 if (llvm::count(Match, Match_InvalidOperand) == 1) {
4591 return Error(IDLoc, "invalid operand for instruction", EmptyRange,
4592 MatchingInlineAsm);
4593 }
4594
4595 if (llvm::count(Match, Match_InvalidImmUnsignedi4) == 1) {
4596 SMLoc ErrorLoc = ((X86Operand &)*Operands[ErrorInfo]).getStartLoc();
4597 if (ErrorLoc == SMLoc())
4598 ErrorLoc = IDLoc;
4599 return Error(ErrorLoc, "immediate must be an integer in range [0, 15]",
4600 EmptyRange, MatchingInlineAsm);
4601 }
4602
4603 // If all of these were an outright failure, report it in a useless way.
4604 return Error(IDLoc, "unknown instruction mnemonic", EmptyRange,
4605 MatchingInlineAsm);
4606 }
4607
OmitRegisterFromClobberLists(unsigned RegNo)4608 bool X86AsmParser::OmitRegisterFromClobberLists(unsigned RegNo) {
4609 return X86MCRegisterClasses[X86::SEGMENT_REGRegClassID].contains(RegNo);
4610 }
4611
ParseDirective(AsmToken DirectiveID)4612 bool X86AsmParser::ParseDirective(AsmToken DirectiveID) {
4613 MCAsmParser &Parser = getParser();
4614 StringRef IDVal = DirectiveID.getIdentifier();
4615 if (IDVal.startswith(".arch"))
4616 return parseDirectiveArch();
4617 if (IDVal.startswith(".code"))
4618 return ParseDirectiveCode(IDVal, DirectiveID.getLoc());
4619 else if (IDVal.startswith(".att_syntax")) {
4620 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4621 if (Parser.getTok().getString() == "prefix")
4622 Parser.Lex();
4623 else if (Parser.getTok().getString() == "noprefix")
4624 return Error(DirectiveID.getLoc(), "'.att_syntax noprefix' is not "
4625 "supported: registers must have a "
4626 "'%' prefix in .att_syntax");
4627 }
4628 getParser().setAssemblerDialect(0);
4629 return false;
4630 } else if (IDVal.startswith(".intel_syntax")) {
4631 getParser().setAssemblerDialect(1);
4632 if (getLexer().isNot(AsmToken::EndOfStatement)) {
4633 if (Parser.getTok().getString() == "noprefix")
4634 Parser.Lex();
4635 else if (Parser.getTok().getString() == "prefix")
4636 return Error(DirectiveID.getLoc(), "'.intel_syntax prefix' is not "
4637 "supported: registers must not have "
4638 "a '%' prefix in .intel_syntax");
4639 }
4640 return false;
4641 } else if (IDVal == ".nops")
4642 return parseDirectiveNops(DirectiveID.getLoc());
4643 else if (IDVal == ".even")
4644 return parseDirectiveEven(DirectiveID.getLoc());
4645 else if (IDVal == ".cv_fpo_proc")
4646 return parseDirectiveFPOProc(DirectiveID.getLoc());
4647 else if (IDVal == ".cv_fpo_setframe")
4648 return parseDirectiveFPOSetFrame(DirectiveID.getLoc());
4649 else if (IDVal == ".cv_fpo_pushreg")
4650 return parseDirectiveFPOPushReg(DirectiveID.getLoc());
4651 else if (IDVal == ".cv_fpo_stackalloc")
4652 return parseDirectiveFPOStackAlloc(DirectiveID.getLoc());
4653 else if (IDVal == ".cv_fpo_stackalign")
4654 return parseDirectiveFPOStackAlign(DirectiveID.getLoc());
4655 else if (IDVal == ".cv_fpo_endprologue")
4656 return parseDirectiveFPOEndPrologue(DirectiveID.getLoc());
4657 else if (IDVal == ".cv_fpo_endproc")
4658 return parseDirectiveFPOEndProc(DirectiveID.getLoc());
4659 else if (IDVal == ".seh_pushreg" ||
4660 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushreg")))
4661 return parseDirectiveSEHPushReg(DirectiveID.getLoc());
4662 else if (IDVal == ".seh_setframe" ||
4663 (Parser.isParsingMasm() && IDVal.equals_insensitive(".setframe")))
4664 return parseDirectiveSEHSetFrame(DirectiveID.getLoc());
4665 else if (IDVal == ".seh_savereg" ||
4666 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savereg")))
4667 return parseDirectiveSEHSaveReg(DirectiveID.getLoc());
4668 else if (IDVal == ".seh_savexmm" ||
4669 (Parser.isParsingMasm() && IDVal.equals_insensitive(".savexmm128")))
4670 return parseDirectiveSEHSaveXMM(DirectiveID.getLoc());
4671 else if (IDVal == ".seh_pushframe" ||
4672 (Parser.isParsingMasm() && IDVal.equals_insensitive(".pushframe")))
4673 return parseDirectiveSEHPushFrame(DirectiveID.getLoc());
4674
4675 return true;
4676 }
4677
parseDirectiveArch()4678 bool X86AsmParser::parseDirectiveArch() {
4679 // Ignore .arch for now.
4680 getParser().parseStringToEndOfStatement();
4681 return false;
4682 }
4683
4684 /// parseDirectiveNops
4685 /// ::= .nops size[, control]
parseDirectiveNops(SMLoc L)4686 bool X86AsmParser::parseDirectiveNops(SMLoc L) {
4687 int64_t NumBytes = 0, Control = 0;
4688 SMLoc NumBytesLoc, ControlLoc;
4689 const MCSubtargetInfo& STI = getSTI();
4690 NumBytesLoc = getTok().getLoc();
4691 if (getParser().checkForValidSection() ||
4692 getParser().parseAbsoluteExpression(NumBytes))
4693 return true;
4694
4695 if (parseOptionalToken(AsmToken::Comma)) {
4696 ControlLoc = getTok().getLoc();
4697 if (getParser().parseAbsoluteExpression(Control))
4698 return true;
4699 }
4700 if (getParser().parseEOL())
4701 return true;
4702
4703 if (NumBytes <= 0) {
4704 Error(NumBytesLoc, "'.nops' directive with non-positive size");
4705 return false;
4706 }
4707
4708 if (Control < 0) {
4709 Error(ControlLoc, "'.nops' directive with negative NOP size");
4710 return false;
4711 }
4712
4713 /// Emit nops
4714 getParser().getStreamer().emitNops(NumBytes, Control, L, STI);
4715
4716 return false;
4717 }
4718
4719 /// parseDirectiveEven
4720 /// ::= .even
parseDirectiveEven(SMLoc L)4721 bool X86AsmParser::parseDirectiveEven(SMLoc L) {
4722 if (parseEOL())
4723 return false;
4724
4725 const MCSection *Section = getStreamer().getCurrentSectionOnly();
4726 if (!Section) {
4727 getStreamer().initSections(false, getSTI());
4728 Section = getStreamer().getCurrentSectionOnly();
4729 }
4730 if (Section->useCodeAlign())
4731 getStreamer().emitCodeAlignment(Align(2), &getSTI(), 0);
4732 else
4733 getStreamer().emitValueToAlignment(Align(2), 0, 1, 0);
4734 return false;
4735 }
4736
4737 /// ParseDirectiveCode
4738 /// ::= .code16 | .code32 | .code64
ParseDirectiveCode(StringRef IDVal,SMLoc L)4739 bool X86AsmParser::ParseDirectiveCode(StringRef IDVal, SMLoc L) {
4740 MCAsmParser &Parser = getParser();
4741 Code16GCC = false;
4742 if (IDVal == ".code16") {
4743 Parser.Lex();
4744 if (!is16BitMode()) {
4745 SwitchMode(X86::Is16Bit);
4746 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4747 }
4748 } else if (IDVal == ".code16gcc") {
4749 // .code16gcc parses as if in 32-bit mode, but emits code in 16-bit mode.
4750 Parser.Lex();
4751 Code16GCC = true;
4752 if (!is16BitMode()) {
4753 SwitchMode(X86::Is16Bit);
4754 getParser().getStreamer().emitAssemblerFlag(MCAF_Code16);
4755 }
4756 } else if (IDVal == ".code32") {
4757 Parser.Lex();
4758 if (!is32BitMode()) {
4759 SwitchMode(X86::Is32Bit);
4760 getParser().getStreamer().emitAssemblerFlag(MCAF_Code32);
4761 }
4762 } else if (IDVal == ".code64") {
4763 Parser.Lex();
4764 if (!is64BitMode()) {
4765 SwitchMode(X86::Is64Bit);
4766 getParser().getStreamer().emitAssemblerFlag(MCAF_Code64);
4767 }
4768 } else {
4769 Error(L, "unknown directive " + IDVal);
4770 return false;
4771 }
4772
4773 return false;
4774 }
4775
4776 // .cv_fpo_proc foo
parseDirectiveFPOProc(SMLoc L)4777 bool X86AsmParser::parseDirectiveFPOProc(SMLoc L) {
4778 MCAsmParser &Parser = getParser();
4779 StringRef ProcName;
4780 int64_t ParamsSize;
4781 if (Parser.parseIdentifier(ProcName))
4782 return Parser.TokError("expected symbol name");
4783 if (Parser.parseIntToken(ParamsSize, "expected parameter byte count"))
4784 return true;
4785 if (!isUIntN(32, ParamsSize))
4786 return Parser.TokError("parameters size out of range");
4787 if (parseEOL())
4788 return true;
4789 MCSymbol *ProcSym = getContext().getOrCreateSymbol(ProcName);
4790 return getTargetStreamer().emitFPOProc(ProcSym, ParamsSize, L);
4791 }
4792
4793 // .cv_fpo_setframe ebp
parseDirectiveFPOSetFrame(SMLoc L)4794 bool X86AsmParser::parseDirectiveFPOSetFrame(SMLoc L) {
4795 MCRegister Reg;
4796 SMLoc DummyLoc;
4797 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4798 return true;
4799 return getTargetStreamer().emitFPOSetFrame(Reg, L);
4800 }
4801
4802 // .cv_fpo_pushreg ebx
parseDirectiveFPOPushReg(SMLoc L)4803 bool X86AsmParser::parseDirectiveFPOPushReg(SMLoc L) {
4804 MCRegister Reg;
4805 SMLoc DummyLoc;
4806 if (parseRegister(Reg, DummyLoc, DummyLoc) || parseEOL())
4807 return true;
4808 return getTargetStreamer().emitFPOPushReg(Reg, L);
4809 }
4810
4811 // .cv_fpo_stackalloc 20
parseDirectiveFPOStackAlloc(SMLoc L)4812 bool X86AsmParser::parseDirectiveFPOStackAlloc(SMLoc L) {
4813 MCAsmParser &Parser = getParser();
4814 int64_t Offset;
4815 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4816 return true;
4817 return getTargetStreamer().emitFPOStackAlloc(Offset, L);
4818 }
4819
4820 // .cv_fpo_stackalign 8
parseDirectiveFPOStackAlign(SMLoc L)4821 bool X86AsmParser::parseDirectiveFPOStackAlign(SMLoc L) {
4822 MCAsmParser &Parser = getParser();
4823 int64_t Offset;
4824 if (Parser.parseIntToken(Offset, "expected offset") || parseEOL())
4825 return true;
4826 return getTargetStreamer().emitFPOStackAlign(Offset, L);
4827 }
4828
4829 // .cv_fpo_endprologue
parseDirectiveFPOEndPrologue(SMLoc L)4830 bool X86AsmParser::parseDirectiveFPOEndPrologue(SMLoc L) {
4831 MCAsmParser &Parser = getParser();
4832 if (Parser.parseEOL())
4833 return true;
4834 return getTargetStreamer().emitFPOEndPrologue(L);
4835 }
4836
4837 // .cv_fpo_endproc
parseDirectiveFPOEndProc(SMLoc L)4838 bool X86AsmParser::parseDirectiveFPOEndProc(SMLoc L) {
4839 MCAsmParser &Parser = getParser();
4840 if (Parser.parseEOL())
4841 return true;
4842 return getTargetStreamer().emitFPOEndProc(L);
4843 }
4844
parseSEHRegisterNumber(unsigned RegClassID,MCRegister & RegNo)4845 bool X86AsmParser::parseSEHRegisterNumber(unsigned RegClassID,
4846 MCRegister &RegNo) {
4847 SMLoc startLoc = getLexer().getLoc();
4848 const MCRegisterInfo *MRI = getContext().getRegisterInfo();
4849
4850 // Try parsing the argument as a register first.
4851 if (getLexer().getTok().isNot(AsmToken::Integer)) {
4852 SMLoc endLoc;
4853 if (parseRegister(RegNo, startLoc, endLoc))
4854 return true;
4855
4856 if (!X86MCRegisterClasses[RegClassID].contains(RegNo)) {
4857 return Error(startLoc,
4858 "register is not supported for use with this directive");
4859 }
4860 } else {
4861 // Otherwise, an integer number matching the encoding of the desired
4862 // register may appear.
4863 int64_t EncodedReg;
4864 if (getParser().parseAbsoluteExpression(EncodedReg))
4865 return true;
4866
4867 // The SEH register number is the same as the encoding register number. Map
4868 // from the encoding back to the LLVM register number.
4869 RegNo = 0;
4870 for (MCPhysReg Reg : X86MCRegisterClasses[RegClassID]) {
4871 if (MRI->getEncodingValue(Reg) == EncodedReg) {
4872 RegNo = Reg;
4873 break;
4874 }
4875 }
4876 if (RegNo == 0) {
4877 return Error(startLoc,
4878 "incorrect register number for use with this directive");
4879 }
4880 }
4881
4882 return false;
4883 }
4884
parseDirectiveSEHPushReg(SMLoc Loc)4885 bool X86AsmParser::parseDirectiveSEHPushReg(SMLoc Loc) {
4886 MCRegister Reg;
4887 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4888 return true;
4889
4890 if (getLexer().isNot(AsmToken::EndOfStatement))
4891 return TokError("expected end of directive");
4892
4893 getParser().Lex();
4894 getStreamer().emitWinCFIPushReg(Reg, Loc);
4895 return false;
4896 }
4897
parseDirectiveSEHSetFrame(SMLoc Loc)4898 bool X86AsmParser::parseDirectiveSEHSetFrame(SMLoc Loc) {
4899 MCRegister Reg;
4900 int64_t Off;
4901 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4902 return true;
4903 if (getLexer().isNot(AsmToken::Comma))
4904 return TokError("you must specify a stack pointer offset");
4905
4906 getParser().Lex();
4907 if (getParser().parseAbsoluteExpression(Off))
4908 return true;
4909
4910 if (getLexer().isNot(AsmToken::EndOfStatement))
4911 return TokError("expected end of directive");
4912
4913 getParser().Lex();
4914 getStreamer().emitWinCFISetFrame(Reg, Off, Loc);
4915 return false;
4916 }
4917
parseDirectiveSEHSaveReg(SMLoc Loc)4918 bool X86AsmParser::parseDirectiveSEHSaveReg(SMLoc Loc) {
4919 MCRegister Reg;
4920 int64_t Off;
4921 if (parseSEHRegisterNumber(X86::GR64RegClassID, Reg))
4922 return true;
4923 if (getLexer().isNot(AsmToken::Comma))
4924 return TokError("you must specify an offset on the stack");
4925
4926 getParser().Lex();
4927 if (getParser().parseAbsoluteExpression(Off))
4928 return true;
4929
4930 if (getLexer().isNot(AsmToken::EndOfStatement))
4931 return TokError("expected end of directive");
4932
4933 getParser().Lex();
4934 getStreamer().emitWinCFISaveReg(Reg, Off, Loc);
4935 return false;
4936 }
4937
parseDirectiveSEHSaveXMM(SMLoc Loc)4938 bool X86AsmParser::parseDirectiveSEHSaveXMM(SMLoc Loc) {
4939 MCRegister Reg;
4940 int64_t Off;
4941 if (parseSEHRegisterNumber(X86::VR128XRegClassID, Reg))
4942 return true;
4943 if (getLexer().isNot(AsmToken::Comma))
4944 return TokError("you must specify an offset on the stack");
4945
4946 getParser().Lex();
4947 if (getParser().parseAbsoluteExpression(Off))
4948 return true;
4949
4950 if (getLexer().isNot(AsmToken::EndOfStatement))
4951 return TokError("expected end of directive");
4952
4953 getParser().Lex();
4954 getStreamer().emitWinCFISaveXMM(Reg, Off, Loc);
4955 return false;
4956 }
4957
parseDirectiveSEHPushFrame(SMLoc Loc)4958 bool X86AsmParser::parseDirectiveSEHPushFrame(SMLoc Loc) {
4959 bool Code = false;
4960 StringRef CodeID;
4961 if (getLexer().is(AsmToken::At)) {
4962 SMLoc startLoc = getLexer().getLoc();
4963 getParser().Lex();
4964 if (!getParser().parseIdentifier(CodeID)) {
4965 if (CodeID != "code")
4966 return Error(startLoc, "expected @code");
4967 Code = true;
4968 }
4969 }
4970
4971 if (getLexer().isNot(AsmToken::EndOfStatement))
4972 return TokError("expected end of directive");
4973
4974 getParser().Lex();
4975 getStreamer().emitWinCFIPushFrame(Code, Loc);
4976 return false;
4977 }
4978
4979 // Force static initialization.
LLVMInitializeX86AsmParser()4980 extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeX86AsmParser() {
4981 RegisterMCAsmParser<X86AsmParser> X(getTheX86_32Target());
4982 RegisterMCAsmParser<X86AsmParser> Y(getTheX86_64Target());
4983 }
4984
4985 #define GET_REGISTER_MATCHER
4986 #define GET_MATCHER_IMPLEMENTATION
4987 #define GET_SUBTARGET_FEATURE_NAME
4988 #include "X86GenAsmMatcher.inc"
4989