1*67e74705SXin Li //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li // The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li
10*67e74705SXin Li #include "clang/AST/RawCommentList.h"
11*67e74705SXin Li #include "clang/AST/ASTContext.h"
12*67e74705SXin Li #include "clang/AST/Comment.h"
13*67e74705SXin Li #include "clang/AST/CommentBriefParser.h"
14*67e74705SXin Li #include "clang/AST/CommentCommandTraits.h"
15*67e74705SXin Li #include "clang/AST/CommentLexer.h"
16*67e74705SXin Li #include "clang/AST/CommentParser.h"
17*67e74705SXin Li #include "clang/AST/CommentSema.h"
18*67e74705SXin Li #include "clang/Basic/CharInfo.h"
19*67e74705SXin Li #include "llvm/ADT/STLExtras.h"
20*67e74705SXin Li
21*67e74705SXin Li using namespace clang;
22*67e74705SXin Li
23*67e74705SXin Li namespace {
24*67e74705SXin Li /// Get comment kind and bool describing if it is a trailing comment.
getCommentKind(StringRef Comment,bool ParseAllComments)25*67e74705SXin Li std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
26*67e74705SXin Li bool ParseAllComments) {
27*67e74705SXin Li const size_t MinCommentLength = ParseAllComments ? 2 : 3;
28*67e74705SXin Li if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
29*67e74705SXin Li return std::make_pair(RawComment::RCK_Invalid, false);
30*67e74705SXin Li
31*67e74705SXin Li RawComment::CommentKind K;
32*67e74705SXin Li if (Comment[1] == '/') {
33*67e74705SXin Li if (Comment.size() < 3)
34*67e74705SXin Li return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
35*67e74705SXin Li
36*67e74705SXin Li if (Comment[2] == '/')
37*67e74705SXin Li K = RawComment::RCK_BCPLSlash;
38*67e74705SXin Li else if (Comment[2] == '!')
39*67e74705SXin Li K = RawComment::RCK_BCPLExcl;
40*67e74705SXin Li else
41*67e74705SXin Li return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
42*67e74705SXin Li } else {
43*67e74705SXin Li assert(Comment.size() >= 4);
44*67e74705SXin Li
45*67e74705SXin Li // Comment lexer does not understand escapes in comment markers, so pretend
46*67e74705SXin Li // that this is not a comment.
47*67e74705SXin Li if (Comment[1] != '*' ||
48*67e74705SXin Li Comment[Comment.size() - 2] != '*' ||
49*67e74705SXin Li Comment[Comment.size() - 1] != '/')
50*67e74705SXin Li return std::make_pair(RawComment::RCK_Invalid, false);
51*67e74705SXin Li
52*67e74705SXin Li if (Comment[2] == '*')
53*67e74705SXin Li K = RawComment::RCK_JavaDoc;
54*67e74705SXin Li else if (Comment[2] == '!')
55*67e74705SXin Li K = RawComment::RCK_Qt;
56*67e74705SXin Li else
57*67e74705SXin Li return std::make_pair(RawComment::RCK_OrdinaryC, false);
58*67e74705SXin Li }
59*67e74705SXin Li const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
60*67e74705SXin Li return std::make_pair(K, TrailingComment);
61*67e74705SXin Li }
62*67e74705SXin Li
mergedCommentIsTrailingComment(StringRef Comment)63*67e74705SXin Li bool mergedCommentIsTrailingComment(StringRef Comment) {
64*67e74705SXin Li return (Comment.size() > 3) && (Comment[3] == '<');
65*67e74705SXin Li }
66*67e74705SXin Li
67*67e74705SXin Li /// Returns true if R1 and R2 both have valid locations that start on the same
68*67e74705SXin Li /// column.
commentsStartOnSameColumn(const SourceManager & SM,const RawComment & R1,const RawComment & R2)69*67e74705SXin Li bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
70*67e74705SXin Li const RawComment &R2) {
71*67e74705SXin Li SourceLocation L1 = R1.getLocStart();
72*67e74705SXin Li SourceLocation L2 = R2.getLocStart();
73*67e74705SXin Li bool Invalid = false;
74*67e74705SXin Li unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
75*67e74705SXin Li if (!Invalid) {
76*67e74705SXin Li unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
77*67e74705SXin Li return !Invalid && (C1 == C2);
78*67e74705SXin Li }
79*67e74705SXin Li return false;
80*67e74705SXin Li }
81*67e74705SXin Li } // unnamed namespace
82*67e74705SXin Li
83*67e74705SXin Li /// \brief Determines whether there is only whitespace in `Buffer` between `P`
84*67e74705SXin Li /// and the previous line.
85*67e74705SXin Li /// \param Buffer The buffer to search in.
86*67e74705SXin Li /// \param P The offset from the beginning of `Buffer` to start from.
87*67e74705SXin Li /// \return true if all of the characters in `Buffer` ranging from the closest
88*67e74705SXin Li /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
89*67e74705SXin Li /// are whitespace.
onlyWhitespaceOnLineBefore(const char * Buffer,unsigned P)90*67e74705SXin Li static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
91*67e74705SXin Li // Search backwards until we see linefeed or carriage return.
92*67e74705SXin Li for (unsigned I = P; I != 0; --I) {
93*67e74705SXin Li char C = Buffer[I - 1];
94*67e74705SXin Li if (isVerticalWhitespace(C))
95*67e74705SXin Li return true;
96*67e74705SXin Li if (!isHorizontalWhitespace(C))
97*67e74705SXin Li return false;
98*67e74705SXin Li }
99*67e74705SXin Li // We hit the beginning of the buffer.
100*67e74705SXin Li return true;
101*67e74705SXin Li }
102*67e74705SXin Li
103*67e74705SXin Li /// Returns whether `K` is an ordinary comment kind.
isOrdinaryKind(RawComment::CommentKind K)104*67e74705SXin Li static bool isOrdinaryKind(RawComment::CommentKind K) {
105*67e74705SXin Li return (K == RawComment::RCK_OrdinaryBCPL) ||
106*67e74705SXin Li (K == RawComment::RCK_OrdinaryC);
107*67e74705SXin Li }
108*67e74705SXin Li
RawComment(const SourceManager & SourceMgr,SourceRange SR,bool Merged,bool ParseAllComments)109*67e74705SXin Li RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
110*67e74705SXin Li bool Merged, bool ParseAllComments) :
111*67e74705SXin Li Range(SR), RawTextValid(false), BriefTextValid(false),
112*67e74705SXin Li IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
113*67e74705SXin Li ParseAllComments(ParseAllComments) {
114*67e74705SXin Li // Extract raw comment text, if possible.
115*67e74705SXin Li if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
116*67e74705SXin Li Kind = RCK_Invalid;
117*67e74705SXin Li return;
118*67e74705SXin Li }
119*67e74705SXin Li
120*67e74705SXin Li // Guess comment kind.
121*67e74705SXin Li std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
122*67e74705SXin Li
123*67e74705SXin Li // Guess whether an ordinary comment is trailing.
124*67e74705SXin Li if (ParseAllComments && isOrdinaryKind(K.first)) {
125*67e74705SXin Li FileID BeginFileID;
126*67e74705SXin Li unsigned BeginOffset;
127*67e74705SXin Li std::tie(BeginFileID, BeginOffset) =
128*67e74705SXin Li SourceMgr.getDecomposedLoc(Range.getBegin());
129*67e74705SXin Li if (BeginOffset != 0) {
130*67e74705SXin Li bool Invalid = false;
131*67e74705SXin Li const char *Buffer =
132*67e74705SXin Li SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133*67e74705SXin Li IsTrailingComment |=
134*67e74705SXin Li (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135*67e74705SXin Li }
136*67e74705SXin Li }
137*67e74705SXin Li
138*67e74705SXin Li if (!Merged) {
139*67e74705SXin Li Kind = K.first;
140*67e74705SXin Li IsTrailingComment |= K.second;
141*67e74705SXin Li
142*67e74705SXin Li IsAlmostTrailingComment = RawText.startswith("//<") ||
143*67e74705SXin Li RawText.startswith("/*<");
144*67e74705SXin Li } else {
145*67e74705SXin Li Kind = RCK_Merged;
146*67e74705SXin Li IsTrailingComment =
147*67e74705SXin Li IsTrailingComment || mergedCommentIsTrailingComment(RawText);
148*67e74705SXin Li }
149*67e74705SXin Li }
150*67e74705SXin Li
getRawTextSlow(const SourceManager & SourceMgr) const151*67e74705SXin Li StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152*67e74705SXin Li FileID BeginFileID;
153*67e74705SXin Li FileID EndFileID;
154*67e74705SXin Li unsigned BeginOffset;
155*67e74705SXin Li unsigned EndOffset;
156*67e74705SXin Li
157*67e74705SXin Li std::tie(BeginFileID, BeginOffset) =
158*67e74705SXin Li SourceMgr.getDecomposedLoc(Range.getBegin());
159*67e74705SXin Li std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160*67e74705SXin Li
161*67e74705SXin Li const unsigned Length = EndOffset - BeginOffset;
162*67e74705SXin Li if (Length < 2)
163*67e74705SXin Li return StringRef();
164*67e74705SXin Li
165*67e74705SXin Li // The comment can't begin in one file and end in another.
166*67e74705SXin Li assert(BeginFileID == EndFileID);
167*67e74705SXin Li
168*67e74705SXin Li bool Invalid = false;
169*67e74705SXin Li const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170*67e74705SXin Li &Invalid).data();
171*67e74705SXin Li if (Invalid)
172*67e74705SXin Li return StringRef();
173*67e74705SXin Li
174*67e74705SXin Li return StringRef(BufferStart + BeginOffset, Length);
175*67e74705SXin Li }
176*67e74705SXin Li
extractBriefText(const ASTContext & Context) const177*67e74705SXin Li const char *RawComment::extractBriefText(const ASTContext &Context) const {
178*67e74705SXin Li // Make sure that RawText is valid.
179*67e74705SXin Li getRawText(Context.getSourceManager());
180*67e74705SXin Li
181*67e74705SXin Li // Since we will be copying the resulting text, all allocations made during
182*67e74705SXin Li // parsing are garbage after resulting string is formed. Thus we can use
183*67e74705SXin Li // a separate allocator for all temporary stuff.
184*67e74705SXin Li llvm::BumpPtrAllocator Allocator;
185*67e74705SXin Li
186*67e74705SXin Li comments::Lexer L(Allocator, Context.getDiagnostics(),
187*67e74705SXin Li Context.getCommentCommandTraits(),
188*67e74705SXin Li Range.getBegin(),
189*67e74705SXin Li RawText.begin(), RawText.end());
190*67e74705SXin Li comments::BriefParser P(L, Context.getCommentCommandTraits());
191*67e74705SXin Li
192*67e74705SXin Li const std::string Result = P.Parse();
193*67e74705SXin Li const unsigned BriefTextLength = Result.size();
194*67e74705SXin Li char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195*67e74705SXin Li memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196*67e74705SXin Li BriefText = BriefTextPtr;
197*67e74705SXin Li BriefTextValid = true;
198*67e74705SXin Li
199*67e74705SXin Li return BriefTextPtr;
200*67e74705SXin Li }
201*67e74705SXin Li
parse(const ASTContext & Context,const Preprocessor * PP,const Decl * D) const202*67e74705SXin Li comments::FullComment *RawComment::parse(const ASTContext &Context,
203*67e74705SXin Li const Preprocessor *PP,
204*67e74705SXin Li const Decl *D) const {
205*67e74705SXin Li // Make sure that RawText is valid.
206*67e74705SXin Li getRawText(Context.getSourceManager());
207*67e74705SXin Li
208*67e74705SXin Li comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209*67e74705SXin Li Context.getCommentCommandTraits(),
210*67e74705SXin Li getSourceRange().getBegin(),
211*67e74705SXin Li RawText.begin(), RawText.end());
212*67e74705SXin Li comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213*67e74705SXin Li Context.getDiagnostics(),
214*67e74705SXin Li Context.getCommentCommandTraits(),
215*67e74705SXin Li PP);
216*67e74705SXin Li S.setDecl(D);
217*67e74705SXin Li comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218*67e74705SXin Li Context.getDiagnostics(),
219*67e74705SXin Li Context.getCommentCommandTraits());
220*67e74705SXin Li
221*67e74705SXin Li return P.parseFullComment();
222*67e74705SXin Li }
223*67e74705SXin Li
onlyWhitespaceBetween(SourceManager & SM,SourceLocation Loc1,SourceLocation Loc2,unsigned MaxNewlinesAllowed)224*67e74705SXin Li static bool onlyWhitespaceBetween(SourceManager &SM,
225*67e74705SXin Li SourceLocation Loc1, SourceLocation Loc2,
226*67e74705SXin Li unsigned MaxNewlinesAllowed) {
227*67e74705SXin Li std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228*67e74705SXin Li std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229*67e74705SXin Li
230*67e74705SXin Li // Question does not make sense if locations are in different files.
231*67e74705SXin Li if (Loc1Info.first != Loc2Info.first)
232*67e74705SXin Li return false;
233*67e74705SXin Li
234*67e74705SXin Li bool Invalid = false;
235*67e74705SXin Li const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236*67e74705SXin Li if (Invalid)
237*67e74705SXin Li return false;
238*67e74705SXin Li
239*67e74705SXin Li unsigned NumNewlines = 0;
240*67e74705SXin Li assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241*67e74705SXin Li // Look for non-whitespace characters and remember any newlines seen.
242*67e74705SXin Li for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243*67e74705SXin Li switch (Buffer[I]) {
244*67e74705SXin Li default:
245*67e74705SXin Li return false;
246*67e74705SXin Li case ' ':
247*67e74705SXin Li case '\t':
248*67e74705SXin Li case '\f':
249*67e74705SXin Li case '\v':
250*67e74705SXin Li break;
251*67e74705SXin Li case '\r':
252*67e74705SXin Li case '\n':
253*67e74705SXin Li ++NumNewlines;
254*67e74705SXin Li
255*67e74705SXin Li // Check if we have found more than the maximum allowed number of
256*67e74705SXin Li // newlines.
257*67e74705SXin Li if (NumNewlines > MaxNewlinesAllowed)
258*67e74705SXin Li return false;
259*67e74705SXin Li
260*67e74705SXin Li // Collapse \r\n and \n\r into a single newline.
261*67e74705SXin Li if (I + 1 != Loc2Info.second &&
262*67e74705SXin Li (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
263*67e74705SXin Li Buffer[I] != Buffer[I + 1])
264*67e74705SXin Li ++I;
265*67e74705SXin Li break;
266*67e74705SXin Li }
267*67e74705SXin Li }
268*67e74705SXin Li
269*67e74705SXin Li return true;
270*67e74705SXin Li }
271*67e74705SXin Li
addComment(const RawComment & RC,llvm::BumpPtrAllocator & Allocator)272*67e74705SXin Li void RawCommentList::addComment(const RawComment &RC,
273*67e74705SXin Li llvm::BumpPtrAllocator &Allocator) {
274*67e74705SXin Li if (RC.isInvalid())
275*67e74705SXin Li return;
276*67e74705SXin Li
277*67e74705SXin Li // Check if the comments are not in source order.
278*67e74705SXin Li while (!Comments.empty() &&
279*67e74705SXin Li !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
280*67e74705SXin Li RC.getLocStart())) {
281*67e74705SXin Li // If they are, just pop a few last comments that don't fit.
282*67e74705SXin Li // This happens if an \#include directive contains comments.
283*67e74705SXin Li Comments.pop_back();
284*67e74705SXin Li }
285*67e74705SXin Li
286*67e74705SXin Li // Ordinary comments are not interesting for us.
287*67e74705SXin Li if (RC.isOrdinary())
288*67e74705SXin Li return;
289*67e74705SXin Li
290*67e74705SXin Li // If this is the first Doxygen comment, save it (because there isn't
291*67e74705SXin Li // anything to merge it with).
292*67e74705SXin Li if (Comments.empty()) {
293*67e74705SXin Li Comments.push_back(new (Allocator) RawComment(RC));
294*67e74705SXin Li return;
295*67e74705SXin Li }
296*67e74705SXin Li
297*67e74705SXin Li const RawComment &C1 = *Comments.back();
298*67e74705SXin Li const RawComment &C2 = RC;
299*67e74705SXin Li
300*67e74705SXin Li // Merge comments only if there is only whitespace between them.
301*67e74705SXin Li // Can't merge trailing and non-trailing comments unless the second is
302*67e74705SXin Li // non-trailing ordinary in the same column, as in the case:
303*67e74705SXin Li // int x; // documents x
304*67e74705SXin Li // // more text
305*67e74705SXin Li // versus:
306*67e74705SXin Li // int x; // documents x
307*67e74705SXin Li // int y; // documents y
308*67e74705SXin Li // or:
309*67e74705SXin Li // int x; // documents x
310*67e74705SXin Li // // documents y
311*67e74705SXin Li // int y;
312*67e74705SXin Li // Merge comments if they are on same or consecutive lines.
313*67e74705SXin Li if ((C1.isTrailingComment() == C2.isTrailingComment() ||
314*67e74705SXin Li (C1.isTrailingComment() && !C2.isTrailingComment() &&
315*67e74705SXin Li isOrdinaryKind(C2.getKind()) &&
316*67e74705SXin Li commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
317*67e74705SXin Li onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
318*67e74705SXin Li /*MaxNewlinesAllowed=*/1)) {
319*67e74705SXin Li SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
320*67e74705SXin Li *Comments.back() = RawComment(SourceMgr, MergedRange, true,
321*67e74705SXin Li RC.isParseAllComments());
322*67e74705SXin Li } else {
323*67e74705SXin Li Comments.push_back(new (Allocator) RawComment(RC));
324*67e74705SXin Li }
325*67e74705SXin Li }
326*67e74705SXin Li
addDeserializedComments(ArrayRef<RawComment * > DeserializedComments)327*67e74705SXin Li void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328*67e74705SXin Li std::vector<RawComment *> MergedComments;
329*67e74705SXin Li MergedComments.reserve(Comments.size() + DeserializedComments.size());
330*67e74705SXin Li
331*67e74705SXin Li std::merge(Comments.begin(), Comments.end(),
332*67e74705SXin Li DeserializedComments.begin(), DeserializedComments.end(),
333*67e74705SXin Li std::back_inserter(MergedComments),
334*67e74705SXin Li BeforeThanCompare<RawComment>(SourceMgr));
335*67e74705SXin Li std::swap(Comments, MergedComments);
336*67e74705SXin Li }
337*67e74705SXin Li
338