1*67e74705SXin Li //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li
10*67e74705SXin Li #include "clang/AST/CommentParser.h"
11*67e74705SXin Li #include "clang/AST/CommentCommandTraits.h"
12*67e74705SXin Li #include "clang/AST/CommentDiagnostic.h"
13*67e74705SXin Li #include "clang/AST/CommentSema.h"
14*67e74705SXin Li #include "clang/Basic/CharInfo.h"
15*67e74705SXin Li #include "clang/Basic/SourceManager.h"
16*67e74705SXin Li #include "llvm/Support/ErrorHandling.h"
17*67e74705SXin Li
18*67e74705SXin Li namespace clang {
19*67e74705SXin Li
isWhitespace(llvm::StringRef S)20*67e74705SXin Li static inline bool isWhitespace(llvm::StringRef S) {
21*67e74705SXin Li for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22*67e74705SXin Li if (!isWhitespace(*I))
23*67e74705SXin Li return false;
24*67e74705SXin Li }
25*67e74705SXin Li return true;
26*67e74705SXin Li }
27*67e74705SXin Li
28*67e74705SXin Li namespace comments {
29*67e74705SXin Li
30*67e74705SXin Li /// Re-lexes a sequence of tok::text tokens.
31*67e74705SXin Li class TextTokenRetokenizer {
32*67e74705SXin Li llvm::BumpPtrAllocator &Allocator;
33*67e74705SXin Li Parser &P;
34*67e74705SXin Li
35*67e74705SXin Li /// This flag is set when there are no more tokens we can fetch from lexer.
36*67e74705SXin Li bool NoMoreInterestingTokens;
37*67e74705SXin Li
38*67e74705SXin Li /// Token buffer: tokens we have processed and lookahead.
39*67e74705SXin Li SmallVector<Token, 16> Toks;
40*67e74705SXin Li
41*67e74705SXin Li /// A position in \c Toks.
42*67e74705SXin Li struct Position {
43*67e74705SXin Li unsigned CurToken;
44*67e74705SXin Li const char *BufferStart;
45*67e74705SXin Li const char *BufferEnd;
46*67e74705SXin Li const char *BufferPtr;
47*67e74705SXin Li SourceLocation BufferStartLoc;
48*67e74705SXin Li };
49*67e74705SXin Li
50*67e74705SXin Li /// Current position in Toks.
51*67e74705SXin Li Position Pos;
52*67e74705SXin Li
isEnd() const53*67e74705SXin Li bool isEnd() const {
54*67e74705SXin Li return Pos.CurToken >= Toks.size();
55*67e74705SXin Li }
56*67e74705SXin Li
57*67e74705SXin Li /// Sets up the buffer pointers to point to current token.
setupBuffer()58*67e74705SXin Li void setupBuffer() {
59*67e74705SXin Li assert(!isEnd());
60*67e74705SXin Li const Token &Tok = Toks[Pos.CurToken];
61*67e74705SXin Li
62*67e74705SXin Li Pos.BufferStart = Tok.getText().begin();
63*67e74705SXin Li Pos.BufferEnd = Tok.getText().end();
64*67e74705SXin Li Pos.BufferPtr = Pos.BufferStart;
65*67e74705SXin Li Pos.BufferStartLoc = Tok.getLocation();
66*67e74705SXin Li }
67*67e74705SXin Li
getSourceLocation() const68*67e74705SXin Li SourceLocation getSourceLocation() const {
69*67e74705SXin Li const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70*67e74705SXin Li return Pos.BufferStartLoc.getLocWithOffset(CharNo);
71*67e74705SXin Li }
72*67e74705SXin Li
peek() const73*67e74705SXin Li char peek() const {
74*67e74705SXin Li assert(!isEnd());
75*67e74705SXin Li assert(Pos.BufferPtr != Pos.BufferEnd);
76*67e74705SXin Li return *Pos.BufferPtr;
77*67e74705SXin Li }
78*67e74705SXin Li
consumeChar()79*67e74705SXin Li void consumeChar() {
80*67e74705SXin Li assert(!isEnd());
81*67e74705SXin Li assert(Pos.BufferPtr != Pos.BufferEnd);
82*67e74705SXin Li Pos.BufferPtr++;
83*67e74705SXin Li if (Pos.BufferPtr == Pos.BufferEnd) {
84*67e74705SXin Li Pos.CurToken++;
85*67e74705SXin Li if (isEnd() && !addToken())
86*67e74705SXin Li return;
87*67e74705SXin Li
88*67e74705SXin Li assert(!isEnd());
89*67e74705SXin Li setupBuffer();
90*67e74705SXin Li }
91*67e74705SXin Li }
92*67e74705SXin Li
93*67e74705SXin Li /// Add a token.
94*67e74705SXin Li /// Returns true on success, false if there are no interesting tokens to
95*67e74705SXin Li /// fetch from lexer.
addToken()96*67e74705SXin Li bool addToken() {
97*67e74705SXin Li if (NoMoreInterestingTokens)
98*67e74705SXin Li return false;
99*67e74705SXin Li
100*67e74705SXin Li if (P.Tok.is(tok::newline)) {
101*67e74705SXin Li // If we see a single newline token between text tokens, skip it.
102*67e74705SXin Li Token Newline = P.Tok;
103*67e74705SXin Li P.consumeToken();
104*67e74705SXin Li if (P.Tok.isNot(tok::text)) {
105*67e74705SXin Li P.putBack(Newline);
106*67e74705SXin Li NoMoreInterestingTokens = true;
107*67e74705SXin Li return false;
108*67e74705SXin Li }
109*67e74705SXin Li }
110*67e74705SXin Li if (P.Tok.isNot(tok::text)) {
111*67e74705SXin Li NoMoreInterestingTokens = true;
112*67e74705SXin Li return false;
113*67e74705SXin Li }
114*67e74705SXin Li
115*67e74705SXin Li Toks.push_back(P.Tok);
116*67e74705SXin Li P.consumeToken();
117*67e74705SXin Li if (Toks.size() == 1)
118*67e74705SXin Li setupBuffer();
119*67e74705SXin Li return true;
120*67e74705SXin Li }
121*67e74705SXin Li
consumeWhitespace()122*67e74705SXin Li void consumeWhitespace() {
123*67e74705SXin Li while (!isEnd()) {
124*67e74705SXin Li if (isWhitespace(peek()))
125*67e74705SXin Li consumeChar();
126*67e74705SXin Li else
127*67e74705SXin Li break;
128*67e74705SXin Li }
129*67e74705SXin Li }
130*67e74705SXin Li
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)131*67e74705SXin Li void formTokenWithChars(Token &Result,
132*67e74705SXin Li SourceLocation Loc,
133*67e74705SXin Li const char *TokBegin,
134*67e74705SXin Li unsigned TokLength,
135*67e74705SXin Li StringRef Text) {
136*67e74705SXin Li Result.setLocation(Loc);
137*67e74705SXin Li Result.setKind(tok::text);
138*67e74705SXin Li Result.setLength(TokLength);
139*67e74705SXin Li #ifndef NDEBUG
140*67e74705SXin Li Result.TextPtr = "<UNSET>";
141*67e74705SXin Li Result.IntVal = 7;
142*67e74705SXin Li #endif
143*67e74705SXin Li Result.setText(Text);
144*67e74705SXin Li }
145*67e74705SXin Li
146*67e74705SXin Li public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)147*67e74705SXin Li TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
148*67e74705SXin Li Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
149*67e74705SXin Li Pos.CurToken = 0;
150*67e74705SXin Li addToken();
151*67e74705SXin Li }
152*67e74705SXin Li
153*67e74705SXin Li /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)154*67e74705SXin Li bool lexWord(Token &Tok) {
155*67e74705SXin Li if (isEnd())
156*67e74705SXin Li return false;
157*67e74705SXin Li
158*67e74705SXin Li Position SavedPos = Pos;
159*67e74705SXin Li
160*67e74705SXin Li consumeWhitespace();
161*67e74705SXin Li SmallString<32> WordText;
162*67e74705SXin Li const char *WordBegin = Pos.BufferPtr;
163*67e74705SXin Li SourceLocation Loc = getSourceLocation();
164*67e74705SXin Li while (!isEnd()) {
165*67e74705SXin Li const char C = peek();
166*67e74705SXin Li if (!isWhitespace(C)) {
167*67e74705SXin Li WordText.push_back(C);
168*67e74705SXin Li consumeChar();
169*67e74705SXin Li } else
170*67e74705SXin Li break;
171*67e74705SXin Li }
172*67e74705SXin Li const unsigned Length = WordText.size();
173*67e74705SXin Li if (Length == 0) {
174*67e74705SXin Li Pos = SavedPos;
175*67e74705SXin Li return false;
176*67e74705SXin Li }
177*67e74705SXin Li
178*67e74705SXin Li char *TextPtr = Allocator.Allocate<char>(Length + 1);
179*67e74705SXin Li
180*67e74705SXin Li memcpy(TextPtr, WordText.c_str(), Length + 1);
181*67e74705SXin Li StringRef Text = StringRef(TextPtr, Length);
182*67e74705SXin Li
183*67e74705SXin Li formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
184*67e74705SXin Li return true;
185*67e74705SXin Li }
186*67e74705SXin Li
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)187*67e74705SXin Li bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
188*67e74705SXin Li if (isEnd())
189*67e74705SXin Li return false;
190*67e74705SXin Li
191*67e74705SXin Li Position SavedPos = Pos;
192*67e74705SXin Li
193*67e74705SXin Li consumeWhitespace();
194*67e74705SXin Li SmallString<32> WordText;
195*67e74705SXin Li const char *WordBegin = Pos.BufferPtr;
196*67e74705SXin Li SourceLocation Loc = getSourceLocation();
197*67e74705SXin Li bool Error = false;
198*67e74705SXin Li if (!isEnd()) {
199*67e74705SXin Li const char C = peek();
200*67e74705SXin Li if (C == OpenDelim) {
201*67e74705SXin Li WordText.push_back(C);
202*67e74705SXin Li consumeChar();
203*67e74705SXin Li } else
204*67e74705SXin Li Error = true;
205*67e74705SXin Li }
206*67e74705SXin Li char C = '\0';
207*67e74705SXin Li while (!Error && !isEnd()) {
208*67e74705SXin Li C = peek();
209*67e74705SXin Li WordText.push_back(C);
210*67e74705SXin Li consumeChar();
211*67e74705SXin Li if (C == CloseDelim)
212*67e74705SXin Li break;
213*67e74705SXin Li }
214*67e74705SXin Li if (!Error && C != CloseDelim)
215*67e74705SXin Li Error = true;
216*67e74705SXin Li
217*67e74705SXin Li if (Error) {
218*67e74705SXin Li Pos = SavedPos;
219*67e74705SXin Li return false;
220*67e74705SXin Li }
221*67e74705SXin Li
222*67e74705SXin Li const unsigned Length = WordText.size();
223*67e74705SXin Li char *TextPtr = Allocator.Allocate<char>(Length + 1);
224*67e74705SXin Li
225*67e74705SXin Li memcpy(TextPtr, WordText.c_str(), Length + 1);
226*67e74705SXin Li StringRef Text = StringRef(TextPtr, Length);
227*67e74705SXin Li
228*67e74705SXin Li formTokenWithChars(Tok, Loc, WordBegin,
229*67e74705SXin Li Pos.BufferPtr - WordBegin, Text);
230*67e74705SXin Li return true;
231*67e74705SXin Li }
232*67e74705SXin Li
233*67e74705SXin Li /// Put back tokens that we didn't consume.
putBackLeftoverTokens()234*67e74705SXin Li void putBackLeftoverTokens() {
235*67e74705SXin Li if (isEnd())
236*67e74705SXin Li return;
237*67e74705SXin Li
238*67e74705SXin Li bool HavePartialTok = false;
239*67e74705SXin Li Token PartialTok;
240*67e74705SXin Li if (Pos.BufferPtr != Pos.BufferStart) {
241*67e74705SXin Li formTokenWithChars(PartialTok, getSourceLocation(),
242*67e74705SXin Li Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
243*67e74705SXin Li StringRef(Pos.BufferPtr,
244*67e74705SXin Li Pos.BufferEnd - Pos.BufferPtr));
245*67e74705SXin Li HavePartialTok = true;
246*67e74705SXin Li Pos.CurToken++;
247*67e74705SXin Li }
248*67e74705SXin Li
249*67e74705SXin Li P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
250*67e74705SXin Li Pos.CurToken = Toks.size();
251*67e74705SXin Li
252*67e74705SXin Li if (HavePartialTok)
253*67e74705SXin Li P.putBack(PartialTok);
254*67e74705SXin Li }
255*67e74705SXin Li };
256*67e74705SXin Li
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)257*67e74705SXin Li Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
258*67e74705SXin Li const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
259*67e74705SXin Li const CommandTraits &Traits):
260*67e74705SXin Li L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
261*67e74705SXin Li Traits(Traits) {
262*67e74705SXin Li consumeToken();
263*67e74705SXin Li }
264*67e74705SXin Li
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)265*67e74705SXin Li void Parser::parseParamCommandArgs(ParamCommandComment *PC,
266*67e74705SXin Li TextTokenRetokenizer &Retokenizer) {
267*67e74705SXin Li Token Arg;
268*67e74705SXin Li // Check if argument looks like direction specification: [dir]
269*67e74705SXin Li // e.g., [in], [out], [in,out]
270*67e74705SXin Li if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
271*67e74705SXin Li S.actOnParamCommandDirectionArg(PC,
272*67e74705SXin Li Arg.getLocation(),
273*67e74705SXin Li Arg.getEndLocation(),
274*67e74705SXin Li Arg.getText());
275*67e74705SXin Li
276*67e74705SXin Li if (Retokenizer.lexWord(Arg))
277*67e74705SXin Li S.actOnParamCommandParamNameArg(PC,
278*67e74705SXin Li Arg.getLocation(),
279*67e74705SXin Li Arg.getEndLocation(),
280*67e74705SXin Li Arg.getText());
281*67e74705SXin Li }
282*67e74705SXin Li
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)283*67e74705SXin Li void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
284*67e74705SXin Li TextTokenRetokenizer &Retokenizer) {
285*67e74705SXin Li Token Arg;
286*67e74705SXin Li if (Retokenizer.lexWord(Arg))
287*67e74705SXin Li S.actOnTParamCommandParamNameArg(TPC,
288*67e74705SXin Li Arg.getLocation(),
289*67e74705SXin Li Arg.getEndLocation(),
290*67e74705SXin Li Arg.getText());
291*67e74705SXin Li }
292*67e74705SXin Li
parseBlockCommandArgs(BlockCommandComment * BC,TextTokenRetokenizer & Retokenizer,unsigned NumArgs)293*67e74705SXin Li void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
294*67e74705SXin Li TextTokenRetokenizer &Retokenizer,
295*67e74705SXin Li unsigned NumArgs) {
296*67e74705SXin Li typedef BlockCommandComment::Argument Argument;
297*67e74705SXin Li Argument *Args =
298*67e74705SXin Li new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
299*67e74705SXin Li unsigned ParsedArgs = 0;
300*67e74705SXin Li Token Arg;
301*67e74705SXin Li while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
302*67e74705SXin Li Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
303*67e74705SXin Li Arg.getEndLocation()),
304*67e74705SXin Li Arg.getText());
305*67e74705SXin Li ParsedArgs++;
306*67e74705SXin Li }
307*67e74705SXin Li
308*67e74705SXin Li S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
309*67e74705SXin Li }
310*67e74705SXin Li
parseBlockCommand()311*67e74705SXin Li BlockCommandComment *Parser::parseBlockCommand() {
312*67e74705SXin Li assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
313*67e74705SXin Li
314*67e74705SXin Li ParamCommandComment *PC = nullptr;
315*67e74705SXin Li TParamCommandComment *TPC = nullptr;
316*67e74705SXin Li BlockCommandComment *BC = nullptr;
317*67e74705SXin Li const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
318*67e74705SXin Li CommandMarkerKind CommandMarker =
319*67e74705SXin Li Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
320*67e74705SXin Li if (Info->IsParamCommand) {
321*67e74705SXin Li PC = S.actOnParamCommandStart(Tok.getLocation(),
322*67e74705SXin Li Tok.getEndLocation(),
323*67e74705SXin Li Tok.getCommandID(),
324*67e74705SXin Li CommandMarker);
325*67e74705SXin Li } else if (Info->IsTParamCommand) {
326*67e74705SXin Li TPC = S.actOnTParamCommandStart(Tok.getLocation(),
327*67e74705SXin Li Tok.getEndLocation(),
328*67e74705SXin Li Tok.getCommandID(),
329*67e74705SXin Li CommandMarker);
330*67e74705SXin Li } else {
331*67e74705SXin Li BC = S.actOnBlockCommandStart(Tok.getLocation(),
332*67e74705SXin Li Tok.getEndLocation(),
333*67e74705SXin Li Tok.getCommandID(),
334*67e74705SXin Li CommandMarker);
335*67e74705SXin Li }
336*67e74705SXin Li consumeToken();
337*67e74705SXin Li
338*67e74705SXin Li if (isTokBlockCommand()) {
339*67e74705SXin Li // Block command ahead. We can't nest block commands, so pretend that this
340*67e74705SXin Li // command has an empty argument.
341*67e74705SXin Li ParagraphComment *Paragraph = S.actOnParagraphComment(None);
342*67e74705SXin Li if (PC) {
343*67e74705SXin Li S.actOnParamCommandFinish(PC, Paragraph);
344*67e74705SXin Li return PC;
345*67e74705SXin Li } else if (TPC) {
346*67e74705SXin Li S.actOnTParamCommandFinish(TPC, Paragraph);
347*67e74705SXin Li return TPC;
348*67e74705SXin Li } else {
349*67e74705SXin Li S.actOnBlockCommandFinish(BC, Paragraph);
350*67e74705SXin Li return BC;
351*67e74705SXin Li }
352*67e74705SXin Li }
353*67e74705SXin Li
354*67e74705SXin Li if (PC || TPC || Info->NumArgs > 0) {
355*67e74705SXin Li // In order to parse command arguments we need to retokenize a few
356*67e74705SXin Li // following text tokens.
357*67e74705SXin Li TextTokenRetokenizer Retokenizer(Allocator, *this);
358*67e74705SXin Li
359*67e74705SXin Li if (PC)
360*67e74705SXin Li parseParamCommandArgs(PC, Retokenizer);
361*67e74705SXin Li else if (TPC)
362*67e74705SXin Li parseTParamCommandArgs(TPC, Retokenizer);
363*67e74705SXin Li else
364*67e74705SXin Li parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
365*67e74705SXin Li
366*67e74705SXin Li Retokenizer.putBackLeftoverTokens();
367*67e74705SXin Li }
368*67e74705SXin Li
369*67e74705SXin Li // If there's a block command ahead, we will attach an empty paragraph to
370*67e74705SXin Li // this command.
371*67e74705SXin Li bool EmptyParagraph = false;
372*67e74705SXin Li if (isTokBlockCommand())
373*67e74705SXin Li EmptyParagraph = true;
374*67e74705SXin Li else if (Tok.is(tok::newline)) {
375*67e74705SXin Li Token PrevTok = Tok;
376*67e74705SXin Li consumeToken();
377*67e74705SXin Li EmptyParagraph = isTokBlockCommand();
378*67e74705SXin Li putBack(PrevTok);
379*67e74705SXin Li }
380*67e74705SXin Li
381*67e74705SXin Li ParagraphComment *Paragraph;
382*67e74705SXin Li if (EmptyParagraph)
383*67e74705SXin Li Paragraph = S.actOnParagraphComment(None);
384*67e74705SXin Li else {
385*67e74705SXin Li BlockContentComment *Block = parseParagraphOrBlockCommand();
386*67e74705SXin Li // Since we have checked for a block command, we should have parsed a
387*67e74705SXin Li // paragraph.
388*67e74705SXin Li Paragraph = cast<ParagraphComment>(Block);
389*67e74705SXin Li }
390*67e74705SXin Li
391*67e74705SXin Li if (PC) {
392*67e74705SXin Li S.actOnParamCommandFinish(PC, Paragraph);
393*67e74705SXin Li return PC;
394*67e74705SXin Li } else if (TPC) {
395*67e74705SXin Li S.actOnTParamCommandFinish(TPC, Paragraph);
396*67e74705SXin Li return TPC;
397*67e74705SXin Li } else {
398*67e74705SXin Li S.actOnBlockCommandFinish(BC, Paragraph);
399*67e74705SXin Li return BC;
400*67e74705SXin Li }
401*67e74705SXin Li }
402*67e74705SXin Li
parseInlineCommand()403*67e74705SXin Li InlineCommandComment *Parser::parseInlineCommand() {
404*67e74705SXin Li assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
405*67e74705SXin Li
406*67e74705SXin Li const Token CommandTok = Tok;
407*67e74705SXin Li consumeToken();
408*67e74705SXin Li
409*67e74705SXin Li TextTokenRetokenizer Retokenizer(Allocator, *this);
410*67e74705SXin Li
411*67e74705SXin Li Token ArgTok;
412*67e74705SXin Li bool ArgTokValid = Retokenizer.lexWord(ArgTok);
413*67e74705SXin Li
414*67e74705SXin Li InlineCommandComment *IC;
415*67e74705SXin Li if (ArgTokValid) {
416*67e74705SXin Li IC = S.actOnInlineCommand(CommandTok.getLocation(),
417*67e74705SXin Li CommandTok.getEndLocation(),
418*67e74705SXin Li CommandTok.getCommandID(),
419*67e74705SXin Li ArgTok.getLocation(),
420*67e74705SXin Li ArgTok.getEndLocation(),
421*67e74705SXin Li ArgTok.getText());
422*67e74705SXin Li } else {
423*67e74705SXin Li IC = S.actOnInlineCommand(CommandTok.getLocation(),
424*67e74705SXin Li CommandTok.getEndLocation(),
425*67e74705SXin Li CommandTok.getCommandID());
426*67e74705SXin Li }
427*67e74705SXin Li
428*67e74705SXin Li Retokenizer.putBackLeftoverTokens();
429*67e74705SXin Li
430*67e74705SXin Li return IC;
431*67e74705SXin Li }
432*67e74705SXin Li
parseHTMLStartTag()433*67e74705SXin Li HTMLStartTagComment *Parser::parseHTMLStartTag() {
434*67e74705SXin Li assert(Tok.is(tok::html_start_tag));
435*67e74705SXin Li HTMLStartTagComment *HST =
436*67e74705SXin Li S.actOnHTMLStartTagStart(Tok.getLocation(),
437*67e74705SXin Li Tok.getHTMLTagStartName());
438*67e74705SXin Li consumeToken();
439*67e74705SXin Li
440*67e74705SXin Li SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
441*67e74705SXin Li while (true) {
442*67e74705SXin Li switch (Tok.getKind()) {
443*67e74705SXin Li case tok::html_ident: {
444*67e74705SXin Li Token Ident = Tok;
445*67e74705SXin Li consumeToken();
446*67e74705SXin Li if (Tok.isNot(tok::html_equals)) {
447*67e74705SXin Li Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
448*67e74705SXin Li Ident.getHTMLIdent()));
449*67e74705SXin Li continue;
450*67e74705SXin Li }
451*67e74705SXin Li Token Equals = Tok;
452*67e74705SXin Li consumeToken();
453*67e74705SXin Li if (Tok.isNot(tok::html_quoted_string)) {
454*67e74705SXin Li Diag(Tok.getLocation(),
455*67e74705SXin Li diag::warn_doc_html_start_tag_expected_quoted_string)
456*67e74705SXin Li << SourceRange(Equals.getLocation());
457*67e74705SXin Li Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
458*67e74705SXin Li Ident.getHTMLIdent()));
459*67e74705SXin Li while (Tok.is(tok::html_equals) ||
460*67e74705SXin Li Tok.is(tok::html_quoted_string))
461*67e74705SXin Li consumeToken();
462*67e74705SXin Li continue;
463*67e74705SXin Li }
464*67e74705SXin Li Attrs.push_back(HTMLStartTagComment::Attribute(
465*67e74705SXin Li Ident.getLocation(),
466*67e74705SXin Li Ident.getHTMLIdent(),
467*67e74705SXin Li Equals.getLocation(),
468*67e74705SXin Li SourceRange(Tok.getLocation(),
469*67e74705SXin Li Tok.getEndLocation()),
470*67e74705SXin Li Tok.getHTMLQuotedString()));
471*67e74705SXin Li consumeToken();
472*67e74705SXin Li continue;
473*67e74705SXin Li }
474*67e74705SXin Li
475*67e74705SXin Li case tok::html_greater:
476*67e74705SXin Li S.actOnHTMLStartTagFinish(HST,
477*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Attrs)),
478*67e74705SXin Li Tok.getLocation(),
479*67e74705SXin Li /* IsSelfClosing = */ false);
480*67e74705SXin Li consumeToken();
481*67e74705SXin Li return HST;
482*67e74705SXin Li
483*67e74705SXin Li case tok::html_slash_greater:
484*67e74705SXin Li S.actOnHTMLStartTagFinish(HST,
485*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Attrs)),
486*67e74705SXin Li Tok.getLocation(),
487*67e74705SXin Li /* IsSelfClosing = */ true);
488*67e74705SXin Li consumeToken();
489*67e74705SXin Li return HST;
490*67e74705SXin Li
491*67e74705SXin Li case tok::html_equals:
492*67e74705SXin Li case tok::html_quoted_string:
493*67e74705SXin Li Diag(Tok.getLocation(),
494*67e74705SXin Li diag::warn_doc_html_start_tag_expected_ident_or_greater);
495*67e74705SXin Li while (Tok.is(tok::html_equals) ||
496*67e74705SXin Li Tok.is(tok::html_quoted_string))
497*67e74705SXin Li consumeToken();
498*67e74705SXin Li if (Tok.is(tok::html_ident) ||
499*67e74705SXin Li Tok.is(tok::html_greater) ||
500*67e74705SXin Li Tok.is(tok::html_slash_greater))
501*67e74705SXin Li continue;
502*67e74705SXin Li
503*67e74705SXin Li S.actOnHTMLStartTagFinish(HST,
504*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Attrs)),
505*67e74705SXin Li SourceLocation(),
506*67e74705SXin Li /* IsSelfClosing = */ false);
507*67e74705SXin Li return HST;
508*67e74705SXin Li
509*67e74705SXin Li default:
510*67e74705SXin Li // Not a token from an HTML start tag. Thus HTML tag prematurely ended.
511*67e74705SXin Li S.actOnHTMLStartTagFinish(HST,
512*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Attrs)),
513*67e74705SXin Li SourceLocation(),
514*67e74705SXin Li /* IsSelfClosing = */ false);
515*67e74705SXin Li bool StartLineInvalid;
516*67e74705SXin Li const unsigned StartLine = SourceMgr.getPresumedLineNumber(
517*67e74705SXin Li HST->getLocation(),
518*67e74705SXin Li &StartLineInvalid);
519*67e74705SXin Li bool EndLineInvalid;
520*67e74705SXin Li const unsigned EndLine = SourceMgr.getPresumedLineNumber(
521*67e74705SXin Li Tok.getLocation(),
522*67e74705SXin Li &EndLineInvalid);
523*67e74705SXin Li if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
524*67e74705SXin Li Diag(Tok.getLocation(),
525*67e74705SXin Li diag::warn_doc_html_start_tag_expected_ident_or_greater)
526*67e74705SXin Li << HST->getSourceRange();
527*67e74705SXin Li else {
528*67e74705SXin Li Diag(Tok.getLocation(),
529*67e74705SXin Li diag::warn_doc_html_start_tag_expected_ident_or_greater);
530*67e74705SXin Li Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
531*67e74705SXin Li << HST->getSourceRange();
532*67e74705SXin Li }
533*67e74705SXin Li return HST;
534*67e74705SXin Li }
535*67e74705SXin Li }
536*67e74705SXin Li }
537*67e74705SXin Li
parseHTMLEndTag()538*67e74705SXin Li HTMLEndTagComment *Parser::parseHTMLEndTag() {
539*67e74705SXin Li assert(Tok.is(tok::html_end_tag));
540*67e74705SXin Li Token TokEndTag = Tok;
541*67e74705SXin Li consumeToken();
542*67e74705SXin Li SourceLocation Loc;
543*67e74705SXin Li if (Tok.is(tok::html_greater)) {
544*67e74705SXin Li Loc = Tok.getLocation();
545*67e74705SXin Li consumeToken();
546*67e74705SXin Li }
547*67e74705SXin Li
548*67e74705SXin Li return S.actOnHTMLEndTag(TokEndTag.getLocation(),
549*67e74705SXin Li Loc,
550*67e74705SXin Li TokEndTag.getHTMLTagEndName());
551*67e74705SXin Li }
552*67e74705SXin Li
parseParagraphOrBlockCommand()553*67e74705SXin Li BlockContentComment *Parser::parseParagraphOrBlockCommand() {
554*67e74705SXin Li SmallVector<InlineContentComment *, 8> Content;
555*67e74705SXin Li
556*67e74705SXin Li while (true) {
557*67e74705SXin Li switch (Tok.getKind()) {
558*67e74705SXin Li case tok::verbatim_block_begin:
559*67e74705SXin Li case tok::verbatim_line_name:
560*67e74705SXin Li case tok::eof:
561*67e74705SXin Li assert(Content.size() != 0);
562*67e74705SXin Li break; // Block content or EOF ahead, finish this parapgaph.
563*67e74705SXin Li
564*67e74705SXin Li case tok::unknown_command:
565*67e74705SXin Li Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
566*67e74705SXin Li Tok.getEndLocation(),
567*67e74705SXin Li Tok.getUnknownCommandName()));
568*67e74705SXin Li consumeToken();
569*67e74705SXin Li continue;
570*67e74705SXin Li
571*67e74705SXin Li case tok::backslash_command:
572*67e74705SXin Li case tok::at_command: {
573*67e74705SXin Li const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
574*67e74705SXin Li if (Info->IsBlockCommand) {
575*67e74705SXin Li if (Content.size() == 0)
576*67e74705SXin Li return parseBlockCommand();
577*67e74705SXin Li break; // Block command ahead, finish this parapgaph.
578*67e74705SXin Li }
579*67e74705SXin Li if (Info->IsVerbatimBlockEndCommand) {
580*67e74705SXin Li Diag(Tok.getLocation(),
581*67e74705SXin Li diag::warn_verbatim_block_end_without_start)
582*67e74705SXin Li << Tok.is(tok::at_command)
583*67e74705SXin Li << Info->Name
584*67e74705SXin Li << SourceRange(Tok.getLocation(), Tok.getEndLocation());
585*67e74705SXin Li consumeToken();
586*67e74705SXin Li continue;
587*67e74705SXin Li }
588*67e74705SXin Li if (Info->IsUnknownCommand) {
589*67e74705SXin Li Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
590*67e74705SXin Li Tok.getEndLocation(),
591*67e74705SXin Li Info->getID()));
592*67e74705SXin Li consumeToken();
593*67e74705SXin Li continue;
594*67e74705SXin Li }
595*67e74705SXin Li assert(Info->IsInlineCommand);
596*67e74705SXin Li Content.push_back(parseInlineCommand());
597*67e74705SXin Li continue;
598*67e74705SXin Li }
599*67e74705SXin Li
600*67e74705SXin Li case tok::newline: {
601*67e74705SXin Li consumeToken();
602*67e74705SXin Li if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
603*67e74705SXin Li consumeToken();
604*67e74705SXin Li break; // Two newlines -- end of paragraph.
605*67e74705SXin Li }
606*67e74705SXin Li // Also allow [tok::newline, tok::text, tok::newline] if the middle
607*67e74705SXin Li // tok::text is just whitespace.
608*67e74705SXin Li if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
609*67e74705SXin Li Token WhitespaceTok = Tok;
610*67e74705SXin Li consumeToken();
611*67e74705SXin Li if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
612*67e74705SXin Li consumeToken();
613*67e74705SXin Li break;
614*67e74705SXin Li }
615*67e74705SXin Li // We have [tok::newline, tok::text, non-newline]. Put back tok::text.
616*67e74705SXin Li putBack(WhitespaceTok);
617*67e74705SXin Li }
618*67e74705SXin Li if (Content.size() > 0)
619*67e74705SXin Li Content.back()->addTrailingNewline();
620*67e74705SXin Li continue;
621*67e74705SXin Li }
622*67e74705SXin Li
623*67e74705SXin Li // Don't deal with HTML tag soup now.
624*67e74705SXin Li case tok::html_start_tag:
625*67e74705SXin Li Content.push_back(parseHTMLStartTag());
626*67e74705SXin Li continue;
627*67e74705SXin Li
628*67e74705SXin Li case tok::html_end_tag:
629*67e74705SXin Li Content.push_back(parseHTMLEndTag());
630*67e74705SXin Li continue;
631*67e74705SXin Li
632*67e74705SXin Li case tok::text:
633*67e74705SXin Li Content.push_back(S.actOnText(Tok.getLocation(),
634*67e74705SXin Li Tok.getEndLocation(),
635*67e74705SXin Li Tok.getText()));
636*67e74705SXin Li consumeToken();
637*67e74705SXin Li continue;
638*67e74705SXin Li
639*67e74705SXin Li case tok::verbatim_block_line:
640*67e74705SXin Li case tok::verbatim_block_end:
641*67e74705SXin Li case tok::verbatim_line_text:
642*67e74705SXin Li case tok::html_ident:
643*67e74705SXin Li case tok::html_equals:
644*67e74705SXin Li case tok::html_quoted_string:
645*67e74705SXin Li case tok::html_greater:
646*67e74705SXin Li case tok::html_slash_greater:
647*67e74705SXin Li llvm_unreachable("should not see this token");
648*67e74705SXin Li }
649*67e74705SXin Li break;
650*67e74705SXin Li }
651*67e74705SXin Li
652*67e74705SXin Li return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
653*67e74705SXin Li }
654*67e74705SXin Li
parseVerbatimBlock()655*67e74705SXin Li VerbatimBlockComment *Parser::parseVerbatimBlock() {
656*67e74705SXin Li assert(Tok.is(tok::verbatim_block_begin));
657*67e74705SXin Li
658*67e74705SXin Li VerbatimBlockComment *VB =
659*67e74705SXin Li S.actOnVerbatimBlockStart(Tok.getLocation(),
660*67e74705SXin Li Tok.getVerbatimBlockID());
661*67e74705SXin Li consumeToken();
662*67e74705SXin Li
663*67e74705SXin Li // Don't create an empty line if verbatim opening command is followed
664*67e74705SXin Li // by a newline.
665*67e74705SXin Li if (Tok.is(tok::newline))
666*67e74705SXin Li consumeToken();
667*67e74705SXin Li
668*67e74705SXin Li SmallVector<VerbatimBlockLineComment *, 8> Lines;
669*67e74705SXin Li while (Tok.is(tok::verbatim_block_line) ||
670*67e74705SXin Li Tok.is(tok::newline)) {
671*67e74705SXin Li VerbatimBlockLineComment *Line;
672*67e74705SXin Li if (Tok.is(tok::verbatim_block_line)) {
673*67e74705SXin Li Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
674*67e74705SXin Li Tok.getVerbatimBlockText());
675*67e74705SXin Li consumeToken();
676*67e74705SXin Li if (Tok.is(tok::newline)) {
677*67e74705SXin Li consumeToken();
678*67e74705SXin Li }
679*67e74705SXin Li } else {
680*67e74705SXin Li // Empty line, just a tok::newline.
681*67e74705SXin Li Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
682*67e74705SXin Li consumeToken();
683*67e74705SXin Li }
684*67e74705SXin Li Lines.push_back(Line);
685*67e74705SXin Li }
686*67e74705SXin Li
687*67e74705SXin Li if (Tok.is(tok::verbatim_block_end)) {
688*67e74705SXin Li const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
689*67e74705SXin Li S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
690*67e74705SXin Li Info->Name,
691*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Lines)));
692*67e74705SXin Li consumeToken();
693*67e74705SXin Li } else {
694*67e74705SXin Li // Unterminated \\verbatim block
695*67e74705SXin Li S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
696*67e74705SXin Li S.copyArray(llvm::makeArrayRef(Lines)));
697*67e74705SXin Li }
698*67e74705SXin Li
699*67e74705SXin Li return VB;
700*67e74705SXin Li }
701*67e74705SXin Li
parseVerbatimLine()702*67e74705SXin Li VerbatimLineComment *Parser::parseVerbatimLine() {
703*67e74705SXin Li assert(Tok.is(tok::verbatim_line_name));
704*67e74705SXin Li
705*67e74705SXin Li Token NameTok = Tok;
706*67e74705SXin Li consumeToken();
707*67e74705SXin Li
708*67e74705SXin Li SourceLocation TextBegin;
709*67e74705SXin Li StringRef Text;
710*67e74705SXin Li // Next token might not be a tok::verbatim_line_text if verbatim line
711*67e74705SXin Li // starting command comes just before a newline or comment end.
712*67e74705SXin Li if (Tok.is(tok::verbatim_line_text)) {
713*67e74705SXin Li TextBegin = Tok.getLocation();
714*67e74705SXin Li Text = Tok.getVerbatimLineText();
715*67e74705SXin Li } else {
716*67e74705SXin Li TextBegin = NameTok.getEndLocation();
717*67e74705SXin Li Text = "";
718*67e74705SXin Li }
719*67e74705SXin Li
720*67e74705SXin Li VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
721*67e74705SXin Li NameTok.getVerbatimLineID(),
722*67e74705SXin Li TextBegin,
723*67e74705SXin Li Text);
724*67e74705SXin Li consumeToken();
725*67e74705SXin Li return VL;
726*67e74705SXin Li }
727*67e74705SXin Li
parseBlockContent()728*67e74705SXin Li BlockContentComment *Parser::parseBlockContent() {
729*67e74705SXin Li switch (Tok.getKind()) {
730*67e74705SXin Li case tok::text:
731*67e74705SXin Li case tok::unknown_command:
732*67e74705SXin Li case tok::backslash_command:
733*67e74705SXin Li case tok::at_command:
734*67e74705SXin Li case tok::html_start_tag:
735*67e74705SXin Li case tok::html_end_tag:
736*67e74705SXin Li return parseParagraphOrBlockCommand();
737*67e74705SXin Li
738*67e74705SXin Li case tok::verbatim_block_begin:
739*67e74705SXin Li return parseVerbatimBlock();
740*67e74705SXin Li
741*67e74705SXin Li case tok::verbatim_line_name:
742*67e74705SXin Li return parseVerbatimLine();
743*67e74705SXin Li
744*67e74705SXin Li case tok::eof:
745*67e74705SXin Li case tok::newline:
746*67e74705SXin Li case tok::verbatim_block_line:
747*67e74705SXin Li case tok::verbatim_block_end:
748*67e74705SXin Li case tok::verbatim_line_text:
749*67e74705SXin Li case tok::html_ident:
750*67e74705SXin Li case tok::html_equals:
751*67e74705SXin Li case tok::html_quoted_string:
752*67e74705SXin Li case tok::html_greater:
753*67e74705SXin Li case tok::html_slash_greater:
754*67e74705SXin Li llvm_unreachable("should not see this token");
755*67e74705SXin Li }
756*67e74705SXin Li llvm_unreachable("bogus token kind");
757*67e74705SXin Li }
758*67e74705SXin Li
parseFullComment()759*67e74705SXin Li FullComment *Parser::parseFullComment() {
760*67e74705SXin Li // Skip newlines at the beginning of the comment.
761*67e74705SXin Li while (Tok.is(tok::newline))
762*67e74705SXin Li consumeToken();
763*67e74705SXin Li
764*67e74705SXin Li SmallVector<BlockContentComment *, 8> Blocks;
765*67e74705SXin Li while (Tok.isNot(tok::eof)) {
766*67e74705SXin Li Blocks.push_back(parseBlockContent());
767*67e74705SXin Li
768*67e74705SXin Li // Skip extra newlines after paragraph end.
769*67e74705SXin Li while (Tok.is(tok::newline))
770*67e74705SXin Li consumeToken();
771*67e74705SXin Li }
772*67e74705SXin Li return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
773*67e74705SXin Li }
774*67e74705SXin Li
775*67e74705SXin Li } // end namespace comments
776*67e74705SXin Li } // end namespace clang
777