xref: /aosp_15_r20/external/clang/lib/AST/RawCommentList.cpp (revision 67e74705e28f6214e480b399dd47ea732279e315)
1*67e74705SXin Li //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2*67e74705SXin Li //
3*67e74705SXin Li //                     The LLVM Compiler Infrastructure
4*67e74705SXin Li //
5*67e74705SXin Li // This file is distributed under the University of Illinois Open Source
6*67e74705SXin Li // License. See LICENSE.TXT for details.
7*67e74705SXin Li //
8*67e74705SXin Li //===----------------------------------------------------------------------===//
9*67e74705SXin Li 
10*67e74705SXin Li #include "clang/AST/RawCommentList.h"
11*67e74705SXin Li #include "clang/AST/ASTContext.h"
12*67e74705SXin Li #include "clang/AST/Comment.h"
13*67e74705SXin Li #include "clang/AST/CommentBriefParser.h"
14*67e74705SXin Li #include "clang/AST/CommentCommandTraits.h"
15*67e74705SXin Li #include "clang/AST/CommentLexer.h"
16*67e74705SXin Li #include "clang/AST/CommentParser.h"
17*67e74705SXin Li #include "clang/AST/CommentSema.h"
18*67e74705SXin Li #include "clang/Basic/CharInfo.h"
19*67e74705SXin Li #include "llvm/ADT/STLExtras.h"
20*67e74705SXin Li 
21*67e74705SXin Li using namespace clang;
22*67e74705SXin Li 
23*67e74705SXin Li namespace {
24*67e74705SXin Li /// Get comment kind and bool describing if it is a trailing comment.
getCommentKind(StringRef Comment,bool ParseAllComments)25*67e74705SXin Li std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
26*67e74705SXin Li                                                         bool ParseAllComments) {
27*67e74705SXin Li   const size_t MinCommentLength = ParseAllComments ? 2 : 3;
28*67e74705SXin Li   if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
29*67e74705SXin Li     return std::make_pair(RawComment::RCK_Invalid, false);
30*67e74705SXin Li 
31*67e74705SXin Li   RawComment::CommentKind K;
32*67e74705SXin Li   if (Comment[1] == '/') {
33*67e74705SXin Li     if (Comment.size() < 3)
34*67e74705SXin Li       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
35*67e74705SXin Li 
36*67e74705SXin Li     if (Comment[2] == '/')
37*67e74705SXin Li       K = RawComment::RCK_BCPLSlash;
38*67e74705SXin Li     else if (Comment[2] == '!')
39*67e74705SXin Li       K = RawComment::RCK_BCPLExcl;
40*67e74705SXin Li     else
41*67e74705SXin Li       return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
42*67e74705SXin Li   } else {
43*67e74705SXin Li     assert(Comment.size() >= 4);
44*67e74705SXin Li 
45*67e74705SXin Li     // Comment lexer does not understand escapes in comment markers, so pretend
46*67e74705SXin Li     // that this is not a comment.
47*67e74705SXin Li     if (Comment[1] != '*' ||
48*67e74705SXin Li         Comment[Comment.size() - 2] != '*' ||
49*67e74705SXin Li         Comment[Comment.size() - 1] != '/')
50*67e74705SXin Li       return std::make_pair(RawComment::RCK_Invalid, false);
51*67e74705SXin Li 
52*67e74705SXin Li     if (Comment[2] == '*')
53*67e74705SXin Li       K = RawComment::RCK_JavaDoc;
54*67e74705SXin Li     else if (Comment[2] == '!')
55*67e74705SXin Li       K = RawComment::RCK_Qt;
56*67e74705SXin Li     else
57*67e74705SXin Li       return std::make_pair(RawComment::RCK_OrdinaryC, false);
58*67e74705SXin Li   }
59*67e74705SXin Li   const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
60*67e74705SXin Li   return std::make_pair(K, TrailingComment);
61*67e74705SXin Li }
62*67e74705SXin Li 
mergedCommentIsTrailingComment(StringRef Comment)63*67e74705SXin Li bool mergedCommentIsTrailingComment(StringRef Comment) {
64*67e74705SXin Li   return (Comment.size() > 3) && (Comment[3] == '<');
65*67e74705SXin Li }
66*67e74705SXin Li 
67*67e74705SXin Li /// Returns true if R1 and R2 both have valid locations that start on the same
68*67e74705SXin Li /// column.
commentsStartOnSameColumn(const SourceManager & SM,const RawComment & R1,const RawComment & R2)69*67e74705SXin Li bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
70*67e74705SXin Li                                const RawComment &R2) {
71*67e74705SXin Li   SourceLocation L1 = R1.getLocStart();
72*67e74705SXin Li   SourceLocation L2 = R2.getLocStart();
73*67e74705SXin Li   bool Invalid = false;
74*67e74705SXin Li   unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
75*67e74705SXin Li   if (!Invalid) {
76*67e74705SXin Li     unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
77*67e74705SXin Li     return !Invalid && (C1 == C2);
78*67e74705SXin Li   }
79*67e74705SXin Li   return false;
80*67e74705SXin Li }
81*67e74705SXin Li } // unnamed namespace
82*67e74705SXin Li 
83*67e74705SXin Li /// \brief Determines whether there is only whitespace in `Buffer` between `P`
84*67e74705SXin Li /// and the previous line.
85*67e74705SXin Li /// \param Buffer The buffer to search in.
86*67e74705SXin Li /// \param P The offset from the beginning of `Buffer` to start from.
87*67e74705SXin Li /// \return true if all of the characters in `Buffer` ranging from the closest
88*67e74705SXin Li /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
89*67e74705SXin Li /// are whitespace.
onlyWhitespaceOnLineBefore(const char * Buffer,unsigned P)90*67e74705SXin Li static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
91*67e74705SXin Li   // Search backwards until we see linefeed or carriage return.
92*67e74705SXin Li   for (unsigned I = P; I != 0; --I) {
93*67e74705SXin Li     char C = Buffer[I - 1];
94*67e74705SXin Li     if (isVerticalWhitespace(C))
95*67e74705SXin Li       return true;
96*67e74705SXin Li     if (!isHorizontalWhitespace(C))
97*67e74705SXin Li       return false;
98*67e74705SXin Li   }
99*67e74705SXin Li   // We hit the beginning of the buffer.
100*67e74705SXin Li   return true;
101*67e74705SXin Li }
102*67e74705SXin Li 
103*67e74705SXin Li /// Returns whether `K` is an ordinary comment kind.
isOrdinaryKind(RawComment::CommentKind K)104*67e74705SXin Li static bool isOrdinaryKind(RawComment::CommentKind K) {
105*67e74705SXin Li   return (K == RawComment::RCK_OrdinaryBCPL) ||
106*67e74705SXin Li          (K == RawComment::RCK_OrdinaryC);
107*67e74705SXin Li }
108*67e74705SXin Li 
RawComment(const SourceManager & SourceMgr,SourceRange SR,bool Merged,bool ParseAllComments)109*67e74705SXin Li RawComment::RawComment(const SourceManager &SourceMgr, SourceRange SR,
110*67e74705SXin Li                        bool Merged, bool ParseAllComments) :
111*67e74705SXin Li     Range(SR), RawTextValid(false), BriefTextValid(false),
112*67e74705SXin Li     IsAttached(false), IsTrailingComment(false), IsAlmostTrailingComment(false),
113*67e74705SXin Li     ParseAllComments(ParseAllComments) {
114*67e74705SXin Li   // Extract raw comment text, if possible.
115*67e74705SXin Li   if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
116*67e74705SXin Li     Kind = RCK_Invalid;
117*67e74705SXin Li     return;
118*67e74705SXin Li   }
119*67e74705SXin Li 
120*67e74705SXin Li   // Guess comment kind.
121*67e74705SXin Li   std::pair<CommentKind, bool> K = getCommentKind(RawText, ParseAllComments);
122*67e74705SXin Li 
123*67e74705SXin Li   // Guess whether an ordinary comment is trailing.
124*67e74705SXin Li   if (ParseAllComments && isOrdinaryKind(K.first)) {
125*67e74705SXin Li     FileID BeginFileID;
126*67e74705SXin Li     unsigned BeginOffset;
127*67e74705SXin Li     std::tie(BeginFileID, BeginOffset) =
128*67e74705SXin Li         SourceMgr.getDecomposedLoc(Range.getBegin());
129*67e74705SXin Li     if (BeginOffset != 0) {
130*67e74705SXin Li       bool Invalid = false;
131*67e74705SXin Li       const char *Buffer =
132*67e74705SXin Li           SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133*67e74705SXin Li       IsTrailingComment |=
134*67e74705SXin Li           (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135*67e74705SXin Li     }
136*67e74705SXin Li   }
137*67e74705SXin Li 
138*67e74705SXin Li   if (!Merged) {
139*67e74705SXin Li     Kind = K.first;
140*67e74705SXin Li     IsTrailingComment |= K.second;
141*67e74705SXin Li 
142*67e74705SXin Li     IsAlmostTrailingComment = RawText.startswith("//<") ||
143*67e74705SXin Li                                  RawText.startswith("/*<");
144*67e74705SXin Li   } else {
145*67e74705SXin Li     Kind = RCK_Merged;
146*67e74705SXin Li     IsTrailingComment =
147*67e74705SXin Li         IsTrailingComment || mergedCommentIsTrailingComment(RawText);
148*67e74705SXin Li   }
149*67e74705SXin Li }
150*67e74705SXin Li 
getRawTextSlow(const SourceManager & SourceMgr) const151*67e74705SXin Li StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152*67e74705SXin Li   FileID BeginFileID;
153*67e74705SXin Li   FileID EndFileID;
154*67e74705SXin Li   unsigned BeginOffset;
155*67e74705SXin Li   unsigned EndOffset;
156*67e74705SXin Li 
157*67e74705SXin Li   std::tie(BeginFileID, BeginOffset) =
158*67e74705SXin Li       SourceMgr.getDecomposedLoc(Range.getBegin());
159*67e74705SXin Li   std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160*67e74705SXin Li 
161*67e74705SXin Li   const unsigned Length = EndOffset - BeginOffset;
162*67e74705SXin Li   if (Length < 2)
163*67e74705SXin Li     return StringRef();
164*67e74705SXin Li 
165*67e74705SXin Li   // The comment can't begin in one file and end in another.
166*67e74705SXin Li   assert(BeginFileID == EndFileID);
167*67e74705SXin Li 
168*67e74705SXin Li   bool Invalid = false;
169*67e74705SXin Li   const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170*67e74705SXin Li                                                     &Invalid).data();
171*67e74705SXin Li   if (Invalid)
172*67e74705SXin Li     return StringRef();
173*67e74705SXin Li 
174*67e74705SXin Li   return StringRef(BufferStart + BeginOffset, Length);
175*67e74705SXin Li }
176*67e74705SXin Li 
extractBriefText(const ASTContext & Context) const177*67e74705SXin Li const char *RawComment::extractBriefText(const ASTContext &Context) const {
178*67e74705SXin Li   // Make sure that RawText is valid.
179*67e74705SXin Li   getRawText(Context.getSourceManager());
180*67e74705SXin Li 
181*67e74705SXin Li   // Since we will be copying the resulting text, all allocations made during
182*67e74705SXin Li   // parsing are garbage after resulting string is formed.  Thus we can use
183*67e74705SXin Li   // a separate allocator for all temporary stuff.
184*67e74705SXin Li   llvm::BumpPtrAllocator Allocator;
185*67e74705SXin Li 
186*67e74705SXin Li   comments::Lexer L(Allocator, Context.getDiagnostics(),
187*67e74705SXin Li                     Context.getCommentCommandTraits(),
188*67e74705SXin Li                     Range.getBegin(),
189*67e74705SXin Li                     RawText.begin(), RawText.end());
190*67e74705SXin Li   comments::BriefParser P(L, Context.getCommentCommandTraits());
191*67e74705SXin Li 
192*67e74705SXin Li   const std::string Result = P.Parse();
193*67e74705SXin Li   const unsigned BriefTextLength = Result.size();
194*67e74705SXin Li   char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195*67e74705SXin Li   memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196*67e74705SXin Li   BriefText = BriefTextPtr;
197*67e74705SXin Li   BriefTextValid = true;
198*67e74705SXin Li 
199*67e74705SXin Li   return BriefTextPtr;
200*67e74705SXin Li }
201*67e74705SXin Li 
parse(const ASTContext & Context,const Preprocessor * PP,const Decl * D) const202*67e74705SXin Li comments::FullComment *RawComment::parse(const ASTContext &Context,
203*67e74705SXin Li                                          const Preprocessor *PP,
204*67e74705SXin Li                                          const Decl *D) const {
205*67e74705SXin Li   // Make sure that RawText is valid.
206*67e74705SXin Li   getRawText(Context.getSourceManager());
207*67e74705SXin Li 
208*67e74705SXin Li   comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209*67e74705SXin Li                     Context.getCommentCommandTraits(),
210*67e74705SXin Li                     getSourceRange().getBegin(),
211*67e74705SXin Li                     RawText.begin(), RawText.end());
212*67e74705SXin Li   comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213*67e74705SXin Li                    Context.getDiagnostics(),
214*67e74705SXin Li                    Context.getCommentCommandTraits(),
215*67e74705SXin Li                    PP);
216*67e74705SXin Li   S.setDecl(D);
217*67e74705SXin Li   comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218*67e74705SXin Li                      Context.getDiagnostics(),
219*67e74705SXin Li                      Context.getCommentCommandTraits());
220*67e74705SXin Li 
221*67e74705SXin Li   return P.parseFullComment();
222*67e74705SXin Li }
223*67e74705SXin Li 
onlyWhitespaceBetween(SourceManager & SM,SourceLocation Loc1,SourceLocation Loc2,unsigned MaxNewlinesAllowed)224*67e74705SXin Li static bool onlyWhitespaceBetween(SourceManager &SM,
225*67e74705SXin Li                                   SourceLocation Loc1, SourceLocation Loc2,
226*67e74705SXin Li                                   unsigned MaxNewlinesAllowed) {
227*67e74705SXin Li   std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228*67e74705SXin Li   std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229*67e74705SXin Li 
230*67e74705SXin Li   // Question does not make sense if locations are in different files.
231*67e74705SXin Li   if (Loc1Info.first != Loc2Info.first)
232*67e74705SXin Li     return false;
233*67e74705SXin Li 
234*67e74705SXin Li   bool Invalid = false;
235*67e74705SXin Li   const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236*67e74705SXin Li   if (Invalid)
237*67e74705SXin Li     return false;
238*67e74705SXin Li 
239*67e74705SXin Li   unsigned NumNewlines = 0;
240*67e74705SXin Li   assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241*67e74705SXin Li   // Look for non-whitespace characters and remember any newlines seen.
242*67e74705SXin Li   for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243*67e74705SXin Li     switch (Buffer[I]) {
244*67e74705SXin Li     default:
245*67e74705SXin Li       return false;
246*67e74705SXin Li     case ' ':
247*67e74705SXin Li     case '\t':
248*67e74705SXin Li     case '\f':
249*67e74705SXin Li     case '\v':
250*67e74705SXin Li       break;
251*67e74705SXin Li     case '\r':
252*67e74705SXin Li     case '\n':
253*67e74705SXin Li       ++NumNewlines;
254*67e74705SXin Li 
255*67e74705SXin Li       // Check if we have found more than the maximum allowed number of
256*67e74705SXin Li       // newlines.
257*67e74705SXin Li       if (NumNewlines > MaxNewlinesAllowed)
258*67e74705SXin Li         return false;
259*67e74705SXin Li 
260*67e74705SXin Li       // Collapse \r\n and \n\r into a single newline.
261*67e74705SXin Li       if (I + 1 != Loc2Info.second &&
262*67e74705SXin Li           (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
263*67e74705SXin Li           Buffer[I] != Buffer[I + 1])
264*67e74705SXin Li         ++I;
265*67e74705SXin Li       break;
266*67e74705SXin Li     }
267*67e74705SXin Li   }
268*67e74705SXin Li 
269*67e74705SXin Li   return true;
270*67e74705SXin Li }
271*67e74705SXin Li 
addComment(const RawComment & RC,llvm::BumpPtrAllocator & Allocator)272*67e74705SXin Li void RawCommentList::addComment(const RawComment &RC,
273*67e74705SXin Li                                 llvm::BumpPtrAllocator &Allocator) {
274*67e74705SXin Li   if (RC.isInvalid())
275*67e74705SXin Li     return;
276*67e74705SXin Li 
277*67e74705SXin Li   // Check if the comments are not in source order.
278*67e74705SXin Li   while (!Comments.empty() &&
279*67e74705SXin Li          !SourceMgr.isBeforeInTranslationUnit(Comments.back()->getLocStart(),
280*67e74705SXin Li                                               RC.getLocStart())) {
281*67e74705SXin Li     // If they are, just pop a few last comments that don't fit.
282*67e74705SXin Li     // This happens if an \#include directive contains comments.
283*67e74705SXin Li     Comments.pop_back();
284*67e74705SXin Li   }
285*67e74705SXin Li 
286*67e74705SXin Li   // Ordinary comments are not interesting for us.
287*67e74705SXin Li   if (RC.isOrdinary())
288*67e74705SXin Li     return;
289*67e74705SXin Li 
290*67e74705SXin Li   // If this is the first Doxygen comment, save it (because there isn't
291*67e74705SXin Li   // anything to merge it with).
292*67e74705SXin Li   if (Comments.empty()) {
293*67e74705SXin Li     Comments.push_back(new (Allocator) RawComment(RC));
294*67e74705SXin Li     return;
295*67e74705SXin Li   }
296*67e74705SXin Li 
297*67e74705SXin Li   const RawComment &C1 = *Comments.back();
298*67e74705SXin Li   const RawComment &C2 = RC;
299*67e74705SXin Li 
300*67e74705SXin Li   // Merge comments only if there is only whitespace between them.
301*67e74705SXin Li   // Can't merge trailing and non-trailing comments unless the second is
302*67e74705SXin Li   // non-trailing ordinary in the same column, as in the case:
303*67e74705SXin Li   //   int x; // documents x
304*67e74705SXin Li   //          // more text
305*67e74705SXin Li   // versus:
306*67e74705SXin Li   //   int x; // documents x
307*67e74705SXin Li   //   int y; // documents y
308*67e74705SXin Li   // or:
309*67e74705SXin Li   //   int x; // documents x
310*67e74705SXin Li   //   // documents y
311*67e74705SXin Li   //   int y;
312*67e74705SXin Li   // Merge comments if they are on same or consecutive lines.
313*67e74705SXin Li   if ((C1.isTrailingComment() == C2.isTrailingComment() ||
314*67e74705SXin Li        (C1.isTrailingComment() && !C2.isTrailingComment() &&
315*67e74705SXin Li         isOrdinaryKind(C2.getKind()) &&
316*67e74705SXin Li         commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
317*67e74705SXin Li       onlyWhitespaceBetween(SourceMgr, C1.getLocEnd(), C2.getLocStart(),
318*67e74705SXin Li                             /*MaxNewlinesAllowed=*/1)) {
319*67e74705SXin Li     SourceRange MergedRange(C1.getLocStart(), C2.getLocEnd());
320*67e74705SXin Li     *Comments.back() = RawComment(SourceMgr, MergedRange, true,
321*67e74705SXin Li                                   RC.isParseAllComments());
322*67e74705SXin Li   } else {
323*67e74705SXin Li     Comments.push_back(new (Allocator) RawComment(RC));
324*67e74705SXin Li   }
325*67e74705SXin Li }
326*67e74705SXin Li 
addDeserializedComments(ArrayRef<RawComment * > DeserializedComments)327*67e74705SXin Li void RawCommentList::addDeserializedComments(ArrayRef<RawComment *> DeserializedComments) {
328*67e74705SXin Li   std::vector<RawComment *> MergedComments;
329*67e74705SXin Li   MergedComments.reserve(Comments.size() + DeserializedComments.size());
330*67e74705SXin Li 
331*67e74705SXin Li   std::merge(Comments.begin(), Comments.end(),
332*67e74705SXin Li              DeserializedComments.begin(), DeserializedComments.end(),
333*67e74705SXin Li              std::back_inserter(MergedComments),
334*67e74705SXin Li              BeforeThanCompare<RawComment>(SourceMgr));
335*67e74705SXin Li   std::swap(Comments, MergedComments);
336*67e74705SXin Li }
337*67e74705SXin Li 
338