xref: /aosp_15_r20/external/clang/lib/AST/CommentParser.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- CommentParser.cpp - Doxygen comment parser -----------------------===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li 
10*67e74705SXin Li #include "clang/AST/CommentParser.h"
11*67e74705SXin Li #include "clang/AST/CommentCommandTraits.h"
12*67e74705SXin Li #include "clang/AST/CommentDiagnostic.h"
13*67e74705SXin Li #include "clang/AST/CommentSema.h"
14*67e74705SXin Li #include "clang/Basic/CharInfo.h"
15*67e74705SXin Li #include "clang/Basic/SourceManager.h"
16*67e74705SXin Li #include "llvm/Support/ErrorHandling.h"
17*67e74705SXin Li 
18*67e74705SXin Li namespace clang {
19*67e74705SXin Li 
isWhitespace(llvm::StringRef S)20*67e74705SXin Li static inline bool isWhitespace(llvm::StringRef S) {
21*67e74705SXin Li   for (StringRef::const_iterator I = S.begin(), E = S.end(); I != E; ++I) {
22*67e74705SXin Li     if (!isWhitespace(*I))
23*67e74705SXin Li       return false;
24*67e74705SXin Li   }
25*67e74705SXin Li   return true;
26*67e74705SXin Li }
27*67e74705SXin Li 
28*67e74705SXin Li namespace comments {
29*67e74705SXin Li 
30*67e74705SXin Li /// Re-lexes a sequence of tok::text tokens.
31*67e74705SXin Li class TextTokenRetokenizer {
32*67e74705SXin Li   llvm::BumpPtrAllocator &Allocator;
33*67e74705SXin Li   Parser &P;
34*67e74705SXin Li 
35*67e74705SXin Li   /// This flag is set when there are no more tokens we can fetch from lexer.
36*67e74705SXin Li   bool NoMoreInterestingTokens;
37*67e74705SXin Li 
38*67e74705SXin Li   /// Token buffer: tokens we have processed and lookahead.
39*67e74705SXin Li   SmallVector<Token, 16> Toks;
40*67e74705SXin Li 
41*67e74705SXin Li   /// A position in \c Toks.
42*67e74705SXin Li   struct Position {
43*67e74705SXin Li     unsigned CurToken;
44*67e74705SXin Li     const char *BufferStart;
45*67e74705SXin Li     const char *BufferEnd;
46*67e74705SXin Li     const char *BufferPtr;
47*67e74705SXin Li     SourceLocation BufferStartLoc;
48*67e74705SXin Li   };
49*67e74705SXin Li 
50*67e74705SXin Li   /// Current position in Toks.
51*67e74705SXin Li   Position Pos;
52*67e74705SXin Li 
isEnd() const53*67e74705SXin Li   bool isEnd() const {
54*67e74705SXin Li     return Pos.CurToken >= Toks.size();
55*67e74705SXin Li   }
56*67e74705SXin Li 
57*67e74705SXin Li   /// Sets up the buffer pointers to point to current token.
setupBuffer()58*67e74705SXin Li   void setupBuffer() {
59*67e74705SXin Li     assert(!isEnd());
60*67e74705SXin Li     const Token &Tok = Toks[Pos.CurToken];
61*67e74705SXin Li 
62*67e74705SXin Li     Pos.BufferStart = Tok.getText().begin();
63*67e74705SXin Li     Pos.BufferEnd = Tok.getText().end();
64*67e74705SXin Li     Pos.BufferPtr = Pos.BufferStart;
65*67e74705SXin Li     Pos.BufferStartLoc = Tok.getLocation();
66*67e74705SXin Li   }
67*67e74705SXin Li 
getSourceLocation() const68*67e74705SXin Li   SourceLocation getSourceLocation() const {
69*67e74705SXin Li     const unsigned CharNo = Pos.BufferPtr - Pos.BufferStart;
70*67e74705SXin Li     return Pos.BufferStartLoc.getLocWithOffset(CharNo);
71*67e74705SXin Li   }
72*67e74705SXin Li 
peek() const73*67e74705SXin Li   char peek() const {
74*67e74705SXin Li     assert(!isEnd());
75*67e74705SXin Li     assert(Pos.BufferPtr != Pos.BufferEnd);
76*67e74705SXin Li     return *Pos.BufferPtr;
77*67e74705SXin Li   }
78*67e74705SXin Li 
consumeChar()79*67e74705SXin Li   void consumeChar() {
80*67e74705SXin Li     assert(!isEnd());
81*67e74705SXin Li     assert(Pos.BufferPtr != Pos.BufferEnd);
82*67e74705SXin Li     Pos.BufferPtr++;
83*67e74705SXin Li     if (Pos.BufferPtr == Pos.BufferEnd) {
84*67e74705SXin Li       Pos.CurToken++;
85*67e74705SXin Li       if (isEnd() && !addToken())
86*67e74705SXin Li         return;
87*67e74705SXin Li 
88*67e74705SXin Li       assert(!isEnd());
89*67e74705SXin Li       setupBuffer();
90*67e74705SXin Li     }
91*67e74705SXin Li   }
92*67e74705SXin Li 
93*67e74705SXin Li   /// Add a token.
94*67e74705SXin Li   /// Returns true on success, false if there are no interesting tokens to
95*67e74705SXin Li   /// fetch from lexer.
addToken()96*67e74705SXin Li   bool addToken() {
97*67e74705SXin Li     if (NoMoreInterestingTokens)
98*67e74705SXin Li       return false;
99*67e74705SXin Li 
100*67e74705SXin Li     if (P.Tok.is(tok::newline)) {
101*67e74705SXin Li       // If we see a single newline token between text tokens, skip it.
102*67e74705SXin Li       Token Newline = P.Tok;
103*67e74705SXin Li       P.consumeToken();
104*67e74705SXin Li       if (P.Tok.isNot(tok::text)) {
105*67e74705SXin Li         P.putBack(Newline);
106*67e74705SXin Li         NoMoreInterestingTokens = true;
107*67e74705SXin Li         return false;
108*67e74705SXin Li       }
109*67e74705SXin Li     }
110*67e74705SXin Li     if (P.Tok.isNot(tok::text)) {
111*67e74705SXin Li       NoMoreInterestingTokens = true;
112*67e74705SXin Li       return false;
113*67e74705SXin Li     }
114*67e74705SXin Li 
115*67e74705SXin Li     Toks.push_back(P.Tok);
116*67e74705SXin Li     P.consumeToken();
117*67e74705SXin Li     if (Toks.size() == 1)
118*67e74705SXin Li       setupBuffer();
119*67e74705SXin Li     return true;
120*67e74705SXin Li   }
121*67e74705SXin Li 
consumeWhitespace()122*67e74705SXin Li   void consumeWhitespace() {
123*67e74705SXin Li     while (!isEnd()) {
124*67e74705SXin Li       if (isWhitespace(peek()))
125*67e74705SXin Li         consumeChar();
126*67e74705SXin Li       else
127*67e74705SXin Li         break;
128*67e74705SXin Li     }
129*67e74705SXin Li   }
130*67e74705SXin Li 
formTokenWithChars(Token & Result,SourceLocation Loc,const char * TokBegin,unsigned TokLength,StringRef Text)131*67e74705SXin Li   void formTokenWithChars(Token &Result,
132*67e74705SXin Li                           SourceLocation Loc,
133*67e74705SXin Li                           const char *TokBegin,
134*67e74705SXin Li                           unsigned TokLength,
135*67e74705SXin Li                           StringRef Text) {
136*67e74705SXin Li     Result.setLocation(Loc);
137*67e74705SXin Li     Result.setKind(tok::text);
138*67e74705SXin Li     Result.setLength(TokLength);
139*67e74705SXin Li #ifndef NDEBUG
140*67e74705SXin Li     Result.TextPtr = "<UNSET>";
141*67e74705SXin Li     Result.IntVal = 7;
142*67e74705SXin Li #endif
143*67e74705SXin Li     Result.setText(Text);
144*67e74705SXin Li   }
145*67e74705SXin Li 
146*67e74705SXin Li public:
TextTokenRetokenizer(llvm::BumpPtrAllocator & Allocator,Parser & P)147*67e74705SXin Li   TextTokenRetokenizer(llvm::BumpPtrAllocator &Allocator, Parser &P):
148*67e74705SXin Li       Allocator(Allocator), P(P), NoMoreInterestingTokens(false) {
149*67e74705SXin Li     Pos.CurToken = 0;
150*67e74705SXin Li     addToken();
151*67e74705SXin Li   }
152*67e74705SXin Li 
153*67e74705SXin Li   /// Extract a word -- sequence of non-whitespace characters.
lexWord(Token & Tok)154*67e74705SXin Li   bool lexWord(Token &Tok) {
155*67e74705SXin Li     if (isEnd())
156*67e74705SXin Li       return false;
157*67e74705SXin Li 
158*67e74705SXin Li     Position SavedPos = Pos;
159*67e74705SXin Li 
160*67e74705SXin Li     consumeWhitespace();
161*67e74705SXin Li     SmallString<32> WordText;
162*67e74705SXin Li     const char *WordBegin = Pos.BufferPtr;
163*67e74705SXin Li     SourceLocation Loc = getSourceLocation();
164*67e74705SXin Li     while (!isEnd()) {
165*67e74705SXin Li       const char C = peek();
166*67e74705SXin Li       if (!isWhitespace(C)) {
167*67e74705SXin Li         WordText.push_back(C);
168*67e74705SXin Li         consumeChar();
169*67e74705SXin Li       } else
170*67e74705SXin Li         break;
171*67e74705SXin Li     }
172*67e74705SXin Li     const unsigned Length = WordText.size();
173*67e74705SXin Li     if (Length == 0) {
174*67e74705SXin Li       Pos = SavedPos;
175*67e74705SXin Li       return false;
176*67e74705SXin Li     }
177*67e74705SXin Li 
178*67e74705SXin Li     char *TextPtr = Allocator.Allocate<char>(Length + 1);
179*67e74705SXin Li 
180*67e74705SXin Li     memcpy(TextPtr, WordText.c_str(), Length + 1);
181*67e74705SXin Li     StringRef Text = StringRef(TextPtr, Length);
182*67e74705SXin Li 
183*67e74705SXin Li     formTokenWithChars(Tok, Loc, WordBegin, Length, Text);
184*67e74705SXin Li     return true;
185*67e74705SXin Li   }
186*67e74705SXin Li 
lexDelimitedSeq(Token & Tok,char OpenDelim,char CloseDelim)187*67e74705SXin Li   bool lexDelimitedSeq(Token &Tok, char OpenDelim, char CloseDelim) {
188*67e74705SXin Li     if (isEnd())
189*67e74705SXin Li       return false;
190*67e74705SXin Li 
191*67e74705SXin Li     Position SavedPos = Pos;
192*67e74705SXin Li 
193*67e74705SXin Li     consumeWhitespace();
194*67e74705SXin Li     SmallString<32> WordText;
195*67e74705SXin Li     const char *WordBegin = Pos.BufferPtr;
196*67e74705SXin Li     SourceLocation Loc = getSourceLocation();
197*67e74705SXin Li     bool Error = false;
198*67e74705SXin Li     if (!isEnd()) {
199*67e74705SXin Li       const char C = peek();
200*67e74705SXin Li       if (C == OpenDelim) {
201*67e74705SXin Li         WordText.push_back(C);
202*67e74705SXin Li         consumeChar();
203*67e74705SXin Li       } else
204*67e74705SXin Li         Error = true;
205*67e74705SXin Li     }
206*67e74705SXin Li     char C = '\0';
207*67e74705SXin Li     while (!Error && !isEnd()) {
208*67e74705SXin Li       C = peek();
209*67e74705SXin Li       WordText.push_back(C);
210*67e74705SXin Li       consumeChar();
211*67e74705SXin Li       if (C == CloseDelim)
212*67e74705SXin Li         break;
213*67e74705SXin Li     }
214*67e74705SXin Li     if (!Error && C != CloseDelim)
215*67e74705SXin Li       Error = true;
216*67e74705SXin Li 
217*67e74705SXin Li     if (Error) {
218*67e74705SXin Li       Pos = SavedPos;
219*67e74705SXin Li       return false;
220*67e74705SXin Li     }
221*67e74705SXin Li 
222*67e74705SXin Li     const unsigned Length = WordText.size();
223*67e74705SXin Li     char *TextPtr = Allocator.Allocate<char>(Length + 1);
224*67e74705SXin Li 
225*67e74705SXin Li     memcpy(TextPtr, WordText.c_str(), Length + 1);
226*67e74705SXin Li     StringRef Text = StringRef(TextPtr, Length);
227*67e74705SXin Li 
228*67e74705SXin Li     formTokenWithChars(Tok, Loc, WordBegin,
229*67e74705SXin Li                        Pos.BufferPtr - WordBegin, Text);
230*67e74705SXin Li     return true;
231*67e74705SXin Li   }
232*67e74705SXin Li 
233*67e74705SXin Li   /// Put back tokens that we didn't consume.
putBackLeftoverTokens()234*67e74705SXin Li   void putBackLeftoverTokens() {
235*67e74705SXin Li     if (isEnd())
236*67e74705SXin Li       return;
237*67e74705SXin Li 
238*67e74705SXin Li     bool HavePartialTok = false;
239*67e74705SXin Li     Token PartialTok;
240*67e74705SXin Li     if (Pos.BufferPtr != Pos.BufferStart) {
241*67e74705SXin Li       formTokenWithChars(PartialTok, getSourceLocation(),
242*67e74705SXin Li                          Pos.BufferPtr, Pos.BufferEnd - Pos.BufferPtr,
243*67e74705SXin Li                          StringRef(Pos.BufferPtr,
244*67e74705SXin Li                                    Pos.BufferEnd - Pos.BufferPtr));
245*67e74705SXin Li       HavePartialTok = true;
246*67e74705SXin Li       Pos.CurToken++;
247*67e74705SXin Li     }
248*67e74705SXin Li 
249*67e74705SXin Li     P.putBack(llvm::makeArrayRef(Toks.begin() + Pos.CurToken, Toks.end()));
250*67e74705SXin Li     Pos.CurToken = Toks.size();
251*67e74705SXin Li 
252*67e74705SXin Li     if (HavePartialTok)
253*67e74705SXin Li       P.putBack(PartialTok);
254*67e74705SXin Li   }
255*67e74705SXin Li };
256*67e74705SXin Li 
Parser(Lexer & L,Sema & S,llvm::BumpPtrAllocator & Allocator,const SourceManager & SourceMgr,DiagnosticsEngine & Diags,const CommandTraits & Traits)257*67e74705SXin Li Parser::Parser(Lexer &L, Sema &S, llvm::BumpPtrAllocator &Allocator,
258*67e74705SXin Li                const SourceManager &SourceMgr, DiagnosticsEngine &Diags,
259*67e74705SXin Li                const CommandTraits &Traits):
260*67e74705SXin Li     L(L), S(S), Allocator(Allocator), SourceMgr(SourceMgr), Diags(Diags),
261*67e74705SXin Li     Traits(Traits) {
262*67e74705SXin Li   consumeToken();
263*67e74705SXin Li }
264*67e74705SXin Li 
parseParamCommandArgs(ParamCommandComment * PC,TextTokenRetokenizer & Retokenizer)265*67e74705SXin Li void Parser::parseParamCommandArgs(ParamCommandComment *PC,
266*67e74705SXin Li                                    TextTokenRetokenizer &Retokenizer) {
267*67e74705SXin Li   Token Arg;
268*67e74705SXin Li   // Check if argument looks like direction specification: [dir]
269*67e74705SXin Li   // e.g., [in], [out], [in,out]
270*67e74705SXin Li   if (Retokenizer.lexDelimitedSeq(Arg, '[', ']'))
271*67e74705SXin Li     S.actOnParamCommandDirectionArg(PC,
272*67e74705SXin Li                                     Arg.getLocation(),
273*67e74705SXin Li                                     Arg.getEndLocation(),
274*67e74705SXin Li                                     Arg.getText());
275*67e74705SXin Li 
276*67e74705SXin Li   if (Retokenizer.lexWord(Arg))
277*67e74705SXin Li     S.actOnParamCommandParamNameArg(PC,
278*67e74705SXin Li                                     Arg.getLocation(),
279*67e74705SXin Li                                     Arg.getEndLocation(),
280*67e74705SXin Li                                     Arg.getText());
281*67e74705SXin Li }
282*67e74705SXin Li 
parseTParamCommandArgs(TParamCommandComment * TPC,TextTokenRetokenizer & Retokenizer)283*67e74705SXin Li void Parser::parseTParamCommandArgs(TParamCommandComment *TPC,
284*67e74705SXin Li                                     TextTokenRetokenizer &Retokenizer) {
285*67e74705SXin Li   Token Arg;
286*67e74705SXin Li   if (Retokenizer.lexWord(Arg))
287*67e74705SXin Li     S.actOnTParamCommandParamNameArg(TPC,
288*67e74705SXin Li                                      Arg.getLocation(),
289*67e74705SXin Li                                      Arg.getEndLocation(),
290*67e74705SXin Li                                      Arg.getText());
291*67e74705SXin Li }
292*67e74705SXin Li 
parseBlockCommandArgs(BlockCommandComment * BC,TextTokenRetokenizer & Retokenizer,unsigned NumArgs)293*67e74705SXin Li void Parser::parseBlockCommandArgs(BlockCommandComment *BC,
294*67e74705SXin Li                                    TextTokenRetokenizer &Retokenizer,
295*67e74705SXin Li                                    unsigned NumArgs) {
296*67e74705SXin Li   typedef BlockCommandComment::Argument Argument;
297*67e74705SXin Li   Argument *Args =
298*67e74705SXin Li       new (Allocator.Allocate<Argument>(NumArgs)) Argument[NumArgs];
299*67e74705SXin Li   unsigned ParsedArgs = 0;
300*67e74705SXin Li   Token Arg;
301*67e74705SXin Li   while (ParsedArgs < NumArgs && Retokenizer.lexWord(Arg)) {
302*67e74705SXin Li     Args[ParsedArgs] = Argument(SourceRange(Arg.getLocation(),
303*67e74705SXin Li                                             Arg.getEndLocation()),
304*67e74705SXin Li                                 Arg.getText());
305*67e74705SXin Li     ParsedArgs++;
306*67e74705SXin Li   }
307*67e74705SXin Li 
308*67e74705SXin Li   S.actOnBlockCommandArgs(BC, llvm::makeArrayRef(Args, ParsedArgs));
309*67e74705SXin Li }
310*67e74705SXin Li 
parseBlockCommand()311*67e74705SXin Li BlockCommandComment *Parser::parseBlockCommand() {
312*67e74705SXin Li   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
313*67e74705SXin Li 
314*67e74705SXin Li   ParamCommandComment *PC = nullptr;
315*67e74705SXin Li   TParamCommandComment *TPC = nullptr;
316*67e74705SXin Li   BlockCommandComment *BC = nullptr;
317*67e74705SXin Li   const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
318*67e74705SXin Li   CommandMarkerKind CommandMarker =
319*67e74705SXin Li       Tok.is(tok::backslash_command) ? CMK_Backslash : CMK_At;
320*67e74705SXin Li   if (Info->IsParamCommand) {
321*67e74705SXin Li     PC = S.actOnParamCommandStart(Tok.getLocation(),
322*67e74705SXin Li                                   Tok.getEndLocation(),
323*67e74705SXin Li                                   Tok.getCommandID(),
324*67e74705SXin Li                                   CommandMarker);
325*67e74705SXin Li   } else if (Info->IsTParamCommand) {
326*67e74705SXin Li     TPC = S.actOnTParamCommandStart(Tok.getLocation(),
327*67e74705SXin Li                                     Tok.getEndLocation(),
328*67e74705SXin Li                                     Tok.getCommandID(),
329*67e74705SXin Li                                     CommandMarker);
330*67e74705SXin Li   } else {
331*67e74705SXin Li     BC = S.actOnBlockCommandStart(Tok.getLocation(),
332*67e74705SXin Li                                   Tok.getEndLocation(),
333*67e74705SXin Li                                   Tok.getCommandID(),
334*67e74705SXin Li                                   CommandMarker);
335*67e74705SXin Li   }
336*67e74705SXin Li   consumeToken();
337*67e74705SXin Li 
338*67e74705SXin Li   if (isTokBlockCommand()) {
339*67e74705SXin Li     // Block command ahead.  We can't nest block commands, so pretend that this
340*67e74705SXin Li     // command has an empty argument.
341*67e74705SXin Li     ParagraphComment *Paragraph = S.actOnParagraphComment(None);
342*67e74705SXin Li     if (PC) {
343*67e74705SXin Li       S.actOnParamCommandFinish(PC, Paragraph);
344*67e74705SXin Li       return PC;
345*67e74705SXin Li     } else if (TPC) {
346*67e74705SXin Li       S.actOnTParamCommandFinish(TPC, Paragraph);
347*67e74705SXin Li       return TPC;
348*67e74705SXin Li     } else {
349*67e74705SXin Li       S.actOnBlockCommandFinish(BC, Paragraph);
350*67e74705SXin Li       return BC;
351*67e74705SXin Li     }
352*67e74705SXin Li   }
353*67e74705SXin Li 
354*67e74705SXin Li   if (PC || TPC || Info->NumArgs > 0) {
355*67e74705SXin Li     // In order to parse command arguments we need to retokenize a few
356*67e74705SXin Li     // following text tokens.
357*67e74705SXin Li     TextTokenRetokenizer Retokenizer(Allocator, *this);
358*67e74705SXin Li 
359*67e74705SXin Li     if (PC)
360*67e74705SXin Li       parseParamCommandArgs(PC, Retokenizer);
361*67e74705SXin Li     else if (TPC)
362*67e74705SXin Li       parseTParamCommandArgs(TPC, Retokenizer);
363*67e74705SXin Li     else
364*67e74705SXin Li       parseBlockCommandArgs(BC, Retokenizer, Info->NumArgs);
365*67e74705SXin Li 
366*67e74705SXin Li     Retokenizer.putBackLeftoverTokens();
367*67e74705SXin Li   }
368*67e74705SXin Li 
369*67e74705SXin Li   // If there's a block command ahead, we will attach an empty paragraph to
370*67e74705SXin Li   // this command.
371*67e74705SXin Li   bool EmptyParagraph = false;
372*67e74705SXin Li   if (isTokBlockCommand())
373*67e74705SXin Li     EmptyParagraph = true;
374*67e74705SXin Li   else if (Tok.is(tok::newline)) {
375*67e74705SXin Li     Token PrevTok = Tok;
376*67e74705SXin Li     consumeToken();
377*67e74705SXin Li     EmptyParagraph = isTokBlockCommand();
378*67e74705SXin Li     putBack(PrevTok);
379*67e74705SXin Li   }
380*67e74705SXin Li 
381*67e74705SXin Li   ParagraphComment *Paragraph;
382*67e74705SXin Li   if (EmptyParagraph)
383*67e74705SXin Li     Paragraph = S.actOnParagraphComment(None);
384*67e74705SXin Li   else {
385*67e74705SXin Li     BlockContentComment *Block = parseParagraphOrBlockCommand();
386*67e74705SXin Li     // Since we have checked for a block command, we should have parsed a
387*67e74705SXin Li     // paragraph.
388*67e74705SXin Li     Paragraph = cast<ParagraphComment>(Block);
389*67e74705SXin Li   }
390*67e74705SXin Li 
391*67e74705SXin Li   if (PC) {
392*67e74705SXin Li     S.actOnParamCommandFinish(PC, Paragraph);
393*67e74705SXin Li     return PC;
394*67e74705SXin Li   } else if (TPC) {
395*67e74705SXin Li     S.actOnTParamCommandFinish(TPC, Paragraph);
396*67e74705SXin Li     return TPC;
397*67e74705SXin Li   } else {
398*67e74705SXin Li     S.actOnBlockCommandFinish(BC, Paragraph);
399*67e74705SXin Li     return BC;
400*67e74705SXin Li   }
401*67e74705SXin Li }
402*67e74705SXin Li 
parseInlineCommand()403*67e74705SXin Li InlineCommandComment *Parser::parseInlineCommand() {
404*67e74705SXin Li   assert(Tok.is(tok::backslash_command) || Tok.is(tok::at_command));
405*67e74705SXin Li 
406*67e74705SXin Li   const Token CommandTok = Tok;
407*67e74705SXin Li   consumeToken();
408*67e74705SXin Li 
409*67e74705SXin Li   TextTokenRetokenizer Retokenizer(Allocator, *this);
410*67e74705SXin Li 
411*67e74705SXin Li   Token ArgTok;
412*67e74705SXin Li   bool ArgTokValid = Retokenizer.lexWord(ArgTok);
413*67e74705SXin Li 
414*67e74705SXin Li   InlineCommandComment *IC;
415*67e74705SXin Li   if (ArgTokValid) {
416*67e74705SXin Li     IC = S.actOnInlineCommand(CommandTok.getLocation(),
417*67e74705SXin Li                               CommandTok.getEndLocation(),
418*67e74705SXin Li                               CommandTok.getCommandID(),
419*67e74705SXin Li                               ArgTok.getLocation(),
420*67e74705SXin Li                               ArgTok.getEndLocation(),
421*67e74705SXin Li                               ArgTok.getText());
422*67e74705SXin Li   } else {
423*67e74705SXin Li     IC = S.actOnInlineCommand(CommandTok.getLocation(),
424*67e74705SXin Li                               CommandTok.getEndLocation(),
425*67e74705SXin Li                               CommandTok.getCommandID());
426*67e74705SXin Li   }
427*67e74705SXin Li 
428*67e74705SXin Li   Retokenizer.putBackLeftoverTokens();
429*67e74705SXin Li 
430*67e74705SXin Li   return IC;
431*67e74705SXin Li }
432*67e74705SXin Li 
parseHTMLStartTag()433*67e74705SXin Li HTMLStartTagComment *Parser::parseHTMLStartTag() {
434*67e74705SXin Li   assert(Tok.is(tok::html_start_tag));
435*67e74705SXin Li   HTMLStartTagComment *HST =
436*67e74705SXin Li       S.actOnHTMLStartTagStart(Tok.getLocation(),
437*67e74705SXin Li                                Tok.getHTMLTagStartName());
438*67e74705SXin Li   consumeToken();
439*67e74705SXin Li 
440*67e74705SXin Li   SmallVector<HTMLStartTagComment::Attribute, 2> Attrs;
441*67e74705SXin Li   while (true) {
442*67e74705SXin Li     switch (Tok.getKind()) {
443*67e74705SXin Li     case tok::html_ident: {
444*67e74705SXin Li       Token Ident = Tok;
445*67e74705SXin Li       consumeToken();
446*67e74705SXin Li       if (Tok.isNot(tok::html_equals)) {
447*67e74705SXin Li         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
448*67e74705SXin Li                                                        Ident.getHTMLIdent()));
449*67e74705SXin Li         continue;
450*67e74705SXin Li       }
451*67e74705SXin Li       Token Equals = Tok;
452*67e74705SXin Li       consumeToken();
453*67e74705SXin Li       if (Tok.isNot(tok::html_quoted_string)) {
454*67e74705SXin Li         Diag(Tok.getLocation(),
455*67e74705SXin Li              diag::warn_doc_html_start_tag_expected_quoted_string)
456*67e74705SXin Li           << SourceRange(Equals.getLocation());
457*67e74705SXin Li         Attrs.push_back(HTMLStartTagComment::Attribute(Ident.getLocation(),
458*67e74705SXin Li                                                        Ident.getHTMLIdent()));
459*67e74705SXin Li         while (Tok.is(tok::html_equals) ||
460*67e74705SXin Li                Tok.is(tok::html_quoted_string))
461*67e74705SXin Li           consumeToken();
462*67e74705SXin Li         continue;
463*67e74705SXin Li       }
464*67e74705SXin Li       Attrs.push_back(HTMLStartTagComment::Attribute(
465*67e74705SXin Li                               Ident.getLocation(),
466*67e74705SXin Li                               Ident.getHTMLIdent(),
467*67e74705SXin Li                               Equals.getLocation(),
468*67e74705SXin Li                               SourceRange(Tok.getLocation(),
469*67e74705SXin Li                                           Tok.getEndLocation()),
470*67e74705SXin Li                               Tok.getHTMLQuotedString()));
471*67e74705SXin Li       consumeToken();
472*67e74705SXin Li       continue;
473*67e74705SXin Li     }
474*67e74705SXin Li 
475*67e74705SXin Li     case tok::html_greater:
476*67e74705SXin Li       S.actOnHTMLStartTagFinish(HST,
477*67e74705SXin Li                                 S.copyArray(llvm::makeArrayRef(Attrs)),
478*67e74705SXin Li                                 Tok.getLocation(),
479*67e74705SXin Li                                 /* IsSelfClosing = */ false);
480*67e74705SXin Li       consumeToken();
481*67e74705SXin Li       return HST;
482*67e74705SXin Li 
483*67e74705SXin Li     case tok::html_slash_greater:
484*67e74705SXin Li       S.actOnHTMLStartTagFinish(HST,
485*67e74705SXin Li                                 S.copyArray(llvm::makeArrayRef(Attrs)),
486*67e74705SXin Li                                 Tok.getLocation(),
487*67e74705SXin Li                                 /* IsSelfClosing = */ true);
488*67e74705SXin Li       consumeToken();
489*67e74705SXin Li       return HST;
490*67e74705SXin Li 
491*67e74705SXin Li     case tok::html_equals:
492*67e74705SXin Li     case tok::html_quoted_string:
493*67e74705SXin Li       Diag(Tok.getLocation(),
494*67e74705SXin Li            diag::warn_doc_html_start_tag_expected_ident_or_greater);
495*67e74705SXin Li       while (Tok.is(tok::html_equals) ||
496*67e74705SXin Li              Tok.is(tok::html_quoted_string))
497*67e74705SXin Li         consumeToken();
498*67e74705SXin Li       if (Tok.is(tok::html_ident) ||
499*67e74705SXin Li           Tok.is(tok::html_greater) ||
500*67e74705SXin Li           Tok.is(tok::html_slash_greater))
501*67e74705SXin Li         continue;
502*67e74705SXin Li 
503*67e74705SXin Li       S.actOnHTMLStartTagFinish(HST,
504*67e74705SXin Li                                 S.copyArray(llvm::makeArrayRef(Attrs)),
505*67e74705SXin Li                                 SourceLocation(),
506*67e74705SXin Li                                 /* IsSelfClosing = */ false);
507*67e74705SXin Li       return HST;
508*67e74705SXin Li 
509*67e74705SXin Li     default:
510*67e74705SXin Li       // Not a token from an HTML start tag.  Thus HTML tag prematurely ended.
511*67e74705SXin Li       S.actOnHTMLStartTagFinish(HST,
512*67e74705SXin Li                                 S.copyArray(llvm::makeArrayRef(Attrs)),
513*67e74705SXin Li                                 SourceLocation(),
514*67e74705SXin Li                                 /* IsSelfClosing = */ false);
515*67e74705SXin Li       bool StartLineInvalid;
516*67e74705SXin Li       const unsigned StartLine = SourceMgr.getPresumedLineNumber(
517*67e74705SXin Li                                                   HST->getLocation(),
518*67e74705SXin Li                                                   &StartLineInvalid);
519*67e74705SXin Li       bool EndLineInvalid;
520*67e74705SXin Li       const unsigned EndLine = SourceMgr.getPresumedLineNumber(
521*67e74705SXin Li                                                   Tok.getLocation(),
522*67e74705SXin Li                                                   &EndLineInvalid);
523*67e74705SXin Li       if (StartLineInvalid || EndLineInvalid || StartLine == EndLine)
524*67e74705SXin Li         Diag(Tok.getLocation(),
525*67e74705SXin Li              diag::warn_doc_html_start_tag_expected_ident_or_greater)
526*67e74705SXin Li           << HST->getSourceRange();
527*67e74705SXin Li       else {
528*67e74705SXin Li         Diag(Tok.getLocation(),
529*67e74705SXin Li              diag::warn_doc_html_start_tag_expected_ident_or_greater);
530*67e74705SXin Li         Diag(HST->getLocation(), diag::note_doc_html_tag_started_here)
531*67e74705SXin Li           << HST->getSourceRange();
532*67e74705SXin Li       }
533*67e74705SXin Li       return HST;
534*67e74705SXin Li     }
535*67e74705SXin Li   }
536*67e74705SXin Li }
537*67e74705SXin Li 
parseHTMLEndTag()538*67e74705SXin Li HTMLEndTagComment *Parser::parseHTMLEndTag() {
539*67e74705SXin Li   assert(Tok.is(tok::html_end_tag));
540*67e74705SXin Li   Token TokEndTag = Tok;
541*67e74705SXin Li   consumeToken();
542*67e74705SXin Li   SourceLocation Loc;
543*67e74705SXin Li   if (Tok.is(tok::html_greater)) {
544*67e74705SXin Li     Loc = Tok.getLocation();
545*67e74705SXin Li     consumeToken();
546*67e74705SXin Li   }
547*67e74705SXin Li 
548*67e74705SXin Li   return S.actOnHTMLEndTag(TokEndTag.getLocation(),
549*67e74705SXin Li                            Loc,
550*67e74705SXin Li                            TokEndTag.getHTMLTagEndName());
551*67e74705SXin Li }
552*67e74705SXin Li 
parseParagraphOrBlockCommand()553*67e74705SXin Li BlockContentComment *Parser::parseParagraphOrBlockCommand() {
554*67e74705SXin Li   SmallVector<InlineContentComment *, 8> Content;
555*67e74705SXin Li 
556*67e74705SXin Li   while (true) {
557*67e74705SXin Li     switch (Tok.getKind()) {
558*67e74705SXin Li     case tok::verbatim_block_begin:
559*67e74705SXin Li     case tok::verbatim_line_name:
560*67e74705SXin Li     case tok::eof:
561*67e74705SXin Li       assert(Content.size() != 0);
562*67e74705SXin Li       break; // Block content or EOF ahead, finish this parapgaph.
563*67e74705SXin Li 
564*67e74705SXin Li     case tok::unknown_command:
565*67e74705SXin Li       Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
566*67e74705SXin Li                                               Tok.getEndLocation(),
567*67e74705SXin Li                                               Tok.getUnknownCommandName()));
568*67e74705SXin Li       consumeToken();
569*67e74705SXin Li       continue;
570*67e74705SXin Li 
571*67e74705SXin Li     case tok::backslash_command:
572*67e74705SXin Li     case tok::at_command: {
573*67e74705SXin Li       const CommandInfo *Info = Traits.getCommandInfo(Tok.getCommandID());
574*67e74705SXin Li       if (Info->IsBlockCommand) {
575*67e74705SXin Li         if (Content.size() == 0)
576*67e74705SXin Li           return parseBlockCommand();
577*67e74705SXin Li         break; // Block command ahead, finish this parapgaph.
578*67e74705SXin Li       }
579*67e74705SXin Li       if (Info->IsVerbatimBlockEndCommand) {
580*67e74705SXin Li         Diag(Tok.getLocation(),
581*67e74705SXin Li              diag::warn_verbatim_block_end_without_start)
582*67e74705SXin Li           << Tok.is(tok::at_command)
583*67e74705SXin Li           << Info->Name
584*67e74705SXin Li           << SourceRange(Tok.getLocation(), Tok.getEndLocation());
585*67e74705SXin Li         consumeToken();
586*67e74705SXin Li         continue;
587*67e74705SXin Li       }
588*67e74705SXin Li       if (Info->IsUnknownCommand) {
589*67e74705SXin Li         Content.push_back(S.actOnUnknownCommand(Tok.getLocation(),
590*67e74705SXin Li                                                 Tok.getEndLocation(),
591*67e74705SXin Li                                                 Info->getID()));
592*67e74705SXin Li         consumeToken();
593*67e74705SXin Li         continue;
594*67e74705SXin Li       }
595*67e74705SXin Li       assert(Info->IsInlineCommand);
596*67e74705SXin Li       Content.push_back(parseInlineCommand());
597*67e74705SXin Li       continue;
598*67e74705SXin Li     }
599*67e74705SXin Li 
600*67e74705SXin Li     case tok::newline: {
601*67e74705SXin Li       consumeToken();
602*67e74705SXin Li       if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
603*67e74705SXin Li         consumeToken();
604*67e74705SXin Li         break; // Two newlines -- end of paragraph.
605*67e74705SXin Li       }
606*67e74705SXin Li       // Also allow [tok::newline, tok::text, tok::newline] if the middle
607*67e74705SXin Li       // tok::text is just whitespace.
608*67e74705SXin Li       if (Tok.is(tok::text) && isWhitespace(Tok.getText())) {
609*67e74705SXin Li         Token WhitespaceTok = Tok;
610*67e74705SXin Li         consumeToken();
611*67e74705SXin Li         if (Tok.is(tok::newline) || Tok.is(tok::eof)) {
612*67e74705SXin Li           consumeToken();
613*67e74705SXin Li           break;
614*67e74705SXin Li         }
615*67e74705SXin Li         // We have [tok::newline, tok::text, non-newline].  Put back tok::text.
616*67e74705SXin Li         putBack(WhitespaceTok);
617*67e74705SXin Li       }
618*67e74705SXin Li       if (Content.size() > 0)
619*67e74705SXin Li         Content.back()->addTrailingNewline();
620*67e74705SXin Li       continue;
621*67e74705SXin Li     }
622*67e74705SXin Li 
623*67e74705SXin Li     // Don't deal with HTML tag soup now.
624*67e74705SXin Li     case tok::html_start_tag:
625*67e74705SXin Li       Content.push_back(parseHTMLStartTag());
626*67e74705SXin Li       continue;
627*67e74705SXin Li 
628*67e74705SXin Li     case tok::html_end_tag:
629*67e74705SXin Li       Content.push_back(parseHTMLEndTag());
630*67e74705SXin Li       continue;
631*67e74705SXin Li 
632*67e74705SXin Li     case tok::text:
633*67e74705SXin Li       Content.push_back(S.actOnText(Tok.getLocation(),
634*67e74705SXin Li                                     Tok.getEndLocation(),
635*67e74705SXin Li                                     Tok.getText()));
636*67e74705SXin Li       consumeToken();
637*67e74705SXin Li       continue;
638*67e74705SXin Li 
639*67e74705SXin Li     case tok::verbatim_block_line:
640*67e74705SXin Li     case tok::verbatim_block_end:
641*67e74705SXin Li     case tok::verbatim_line_text:
642*67e74705SXin Li     case tok::html_ident:
643*67e74705SXin Li     case tok::html_equals:
644*67e74705SXin Li     case tok::html_quoted_string:
645*67e74705SXin Li     case tok::html_greater:
646*67e74705SXin Li     case tok::html_slash_greater:
647*67e74705SXin Li       llvm_unreachable("should not see this token");
648*67e74705SXin Li     }
649*67e74705SXin Li     break;
650*67e74705SXin Li   }
651*67e74705SXin Li 
652*67e74705SXin Li   return S.actOnParagraphComment(S.copyArray(llvm::makeArrayRef(Content)));
653*67e74705SXin Li }
654*67e74705SXin Li 
parseVerbatimBlock()655*67e74705SXin Li VerbatimBlockComment *Parser::parseVerbatimBlock() {
656*67e74705SXin Li   assert(Tok.is(tok::verbatim_block_begin));
657*67e74705SXin Li 
658*67e74705SXin Li   VerbatimBlockComment *VB =
659*67e74705SXin Li       S.actOnVerbatimBlockStart(Tok.getLocation(),
660*67e74705SXin Li                                 Tok.getVerbatimBlockID());
661*67e74705SXin Li   consumeToken();
662*67e74705SXin Li 
663*67e74705SXin Li   // Don't create an empty line if verbatim opening command is followed
664*67e74705SXin Li   // by a newline.
665*67e74705SXin Li   if (Tok.is(tok::newline))
666*67e74705SXin Li     consumeToken();
667*67e74705SXin Li 
668*67e74705SXin Li   SmallVector<VerbatimBlockLineComment *, 8> Lines;
669*67e74705SXin Li   while (Tok.is(tok::verbatim_block_line) ||
670*67e74705SXin Li          Tok.is(tok::newline)) {
671*67e74705SXin Li     VerbatimBlockLineComment *Line;
672*67e74705SXin Li     if (Tok.is(tok::verbatim_block_line)) {
673*67e74705SXin Li       Line = S.actOnVerbatimBlockLine(Tok.getLocation(),
674*67e74705SXin Li                                       Tok.getVerbatimBlockText());
675*67e74705SXin Li       consumeToken();
676*67e74705SXin Li       if (Tok.is(tok::newline)) {
677*67e74705SXin Li         consumeToken();
678*67e74705SXin Li       }
679*67e74705SXin Li     } else {
680*67e74705SXin Li       // Empty line, just a tok::newline.
681*67e74705SXin Li       Line = S.actOnVerbatimBlockLine(Tok.getLocation(), "");
682*67e74705SXin Li       consumeToken();
683*67e74705SXin Li     }
684*67e74705SXin Li     Lines.push_back(Line);
685*67e74705SXin Li   }
686*67e74705SXin Li 
687*67e74705SXin Li   if (Tok.is(tok::verbatim_block_end)) {
688*67e74705SXin Li     const CommandInfo *Info = Traits.getCommandInfo(Tok.getVerbatimBlockID());
689*67e74705SXin Li     S.actOnVerbatimBlockFinish(VB, Tok.getLocation(),
690*67e74705SXin Li                                Info->Name,
691*67e74705SXin Li                                S.copyArray(llvm::makeArrayRef(Lines)));
692*67e74705SXin Li     consumeToken();
693*67e74705SXin Li   } else {
694*67e74705SXin Li     // Unterminated \\verbatim block
695*67e74705SXin Li     S.actOnVerbatimBlockFinish(VB, SourceLocation(), "",
696*67e74705SXin Li                                S.copyArray(llvm::makeArrayRef(Lines)));
697*67e74705SXin Li   }
698*67e74705SXin Li 
699*67e74705SXin Li   return VB;
700*67e74705SXin Li }
701*67e74705SXin Li 
parseVerbatimLine()702*67e74705SXin Li VerbatimLineComment *Parser::parseVerbatimLine() {
703*67e74705SXin Li   assert(Tok.is(tok::verbatim_line_name));
704*67e74705SXin Li 
705*67e74705SXin Li   Token NameTok = Tok;
706*67e74705SXin Li   consumeToken();
707*67e74705SXin Li 
708*67e74705SXin Li   SourceLocation TextBegin;
709*67e74705SXin Li   StringRef Text;
710*67e74705SXin Li   // Next token might not be a tok::verbatim_line_text if verbatim line
711*67e74705SXin Li   // starting command comes just before a newline or comment end.
712*67e74705SXin Li   if (Tok.is(tok::verbatim_line_text)) {
713*67e74705SXin Li     TextBegin = Tok.getLocation();
714*67e74705SXin Li     Text = Tok.getVerbatimLineText();
715*67e74705SXin Li   } else {
716*67e74705SXin Li     TextBegin = NameTok.getEndLocation();
717*67e74705SXin Li     Text = "";
718*67e74705SXin Li   }
719*67e74705SXin Li 
720*67e74705SXin Li   VerbatimLineComment *VL = S.actOnVerbatimLine(NameTok.getLocation(),
721*67e74705SXin Li                                                 NameTok.getVerbatimLineID(),
722*67e74705SXin Li                                                 TextBegin,
723*67e74705SXin Li                                                 Text);
724*67e74705SXin Li   consumeToken();
725*67e74705SXin Li   return VL;
726*67e74705SXin Li }
727*67e74705SXin Li 
parseBlockContent()728*67e74705SXin Li BlockContentComment *Parser::parseBlockContent() {
729*67e74705SXin Li   switch (Tok.getKind()) {
730*67e74705SXin Li   case tok::text:
731*67e74705SXin Li   case tok::unknown_command:
732*67e74705SXin Li   case tok::backslash_command:
733*67e74705SXin Li   case tok::at_command:
734*67e74705SXin Li   case tok::html_start_tag:
735*67e74705SXin Li   case tok::html_end_tag:
736*67e74705SXin Li     return parseParagraphOrBlockCommand();
737*67e74705SXin Li 
738*67e74705SXin Li   case tok::verbatim_block_begin:
739*67e74705SXin Li     return parseVerbatimBlock();
740*67e74705SXin Li 
741*67e74705SXin Li   case tok::verbatim_line_name:
742*67e74705SXin Li     return parseVerbatimLine();
743*67e74705SXin Li 
744*67e74705SXin Li   case tok::eof:
745*67e74705SXin Li   case tok::newline:
746*67e74705SXin Li   case tok::verbatim_block_line:
747*67e74705SXin Li   case tok::verbatim_block_end:
748*67e74705SXin Li   case tok::verbatim_line_text:
749*67e74705SXin Li   case tok::html_ident:
750*67e74705SXin Li   case tok::html_equals:
751*67e74705SXin Li   case tok::html_quoted_string:
752*67e74705SXin Li   case tok::html_greater:
753*67e74705SXin Li   case tok::html_slash_greater:
754*67e74705SXin Li     llvm_unreachable("should not see this token");
755*67e74705SXin Li   }
756*67e74705SXin Li   llvm_unreachable("bogus token kind");
757*67e74705SXin Li }
758*67e74705SXin Li 
parseFullComment()759*67e74705SXin Li FullComment *Parser::parseFullComment() {
760*67e74705SXin Li   // Skip newlines at the beginning of the comment.
761*67e74705SXin Li   while (Tok.is(tok::newline))
762*67e74705SXin Li     consumeToken();
763*67e74705SXin Li 
764*67e74705SXin Li   SmallVector<BlockContentComment *, 8> Blocks;
765*67e74705SXin Li   while (Tok.isNot(tok::eof)) {
766*67e74705SXin Li     Blocks.push_back(parseBlockContent());
767*67e74705SXin Li 
768*67e74705SXin Li     // Skip extra newlines after paragraph end.
769*67e74705SXin Li     while (Tok.is(tok::newline))
770*67e74705SXin Li       consumeToken();
771*67e74705SXin Li   }
772*67e74705SXin Li   return S.actOnFullComment(S.copyArray(llvm::makeArrayRef(Blocks)));
773*67e74705SXin Li }
774*67e74705SXin Li 
775*67e74705SXin Li } // end namespace comments
776*67e74705SXin Li } // end namespace clang
777