clang  10.0.0git
RawCommentList.cpp
Go to the documentation of this file.
1 //===--- RawCommentList.cpp - Processing raw comments -----------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
10 #include "clang/AST/ASTContext.h"
11 #include "clang/AST/Comment.h"
14 #include "clang/AST/CommentLexer.h"
16 #include "clang/AST/CommentSema.h"
17 #include "clang/Basic/CharInfo.h"
18 #include "llvm/ADT/STLExtras.h"
19 
20 using namespace clang;
21 
22 namespace {
23 /// Get comment kind and bool describing if it is a trailing comment.
24 std::pair<RawComment::CommentKind, bool> getCommentKind(StringRef Comment,
25  bool ParseAllComments) {
26  const size_t MinCommentLength = ParseAllComments ? 2 : 3;
27  if ((Comment.size() < MinCommentLength) || Comment[0] != '/')
28  return std::make_pair(RawComment::RCK_Invalid, false);
29 
31  if (Comment[1] == '/') {
32  if (Comment.size() < 3)
33  return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
34 
35  if (Comment[2] == '/')
37  else if (Comment[2] == '!')
39  else
40  return std::make_pair(RawComment::RCK_OrdinaryBCPL, false);
41  } else {
42  assert(Comment.size() >= 4);
43 
44  // Comment lexer does not understand escapes in comment markers, so pretend
45  // that this is not a comment.
46  if (Comment[1] != '*' ||
47  Comment[Comment.size() - 2] != '*' ||
48  Comment[Comment.size() - 1] != '/')
49  return std::make_pair(RawComment::RCK_Invalid, false);
50 
51  if (Comment[2] == '*')
53  else if (Comment[2] == '!')
55  else
56  return std::make_pair(RawComment::RCK_OrdinaryC, false);
57  }
58  const bool TrailingComment = (Comment.size() > 3) && (Comment[3] == '<');
59  return std::make_pair(K, TrailingComment);
60 }
61 
62 bool mergedCommentIsTrailingComment(StringRef Comment) {
63  return (Comment.size() > 3) && (Comment[3] == '<');
64 }
65 
66 /// Returns true if R1 and R2 both have valid locations that start on the same
67 /// column.
68 bool commentsStartOnSameColumn(const SourceManager &SM, const RawComment &R1,
69  const RawComment &R2) {
70  SourceLocation L1 = R1.getBeginLoc();
71  SourceLocation L2 = R2.getBeginLoc();
72  bool Invalid = false;
73  unsigned C1 = SM.getPresumedColumnNumber(L1, &Invalid);
74  if (!Invalid) {
75  unsigned C2 = SM.getPresumedColumnNumber(L2, &Invalid);
76  return !Invalid && (C1 == C2);
77  }
78  return false;
79 }
80 } // unnamed namespace
81 
82 /// Determines whether there is only whitespace in `Buffer` between `P`
83 /// and the previous line.
84 /// \param Buffer The buffer to search in.
85 /// \param P The offset from the beginning of `Buffer` to start from.
86 /// \return true if all of the characters in `Buffer` ranging from the closest
87 /// line-ending character before `P` (or the beginning of `Buffer`) to `P - 1`
88 /// are whitespace.
89 static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P) {
90  // Search backwards until we see linefeed or carriage return.
91  for (unsigned I = P; I != 0; --I) {
92  char C = Buffer[I - 1];
93  if (isVerticalWhitespace(C))
94  return true;
95  if (!isHorizontalWhitespace(C))
96  return false;
97  }
98  // We hit the beginning of the buffer.
99  return true;
100 }
101 
102 /// Returns whether `K` is an ordinary comment kind.
104  return (K == RawComment::RCK_OrdinaryBCPL) ||
106 }
107 
109  const CommentOptions &CommentOpts, bool Merged) :
110  Range(SR), RawTextValid(false), BriefTextValid(false),
111  IsAttached(false), IsTrailingComment(false),
112  IsAlmostTrailingComment(false) {
113  // Extract raw comment text, if possible.
114  if (SR.getBegin() == SR.getEnd() || getRawText(SourceMgr).empty()) {
115  Kind = RCK_Invalid;
116  return;
117  }
118 
119  // Guess comment kind.
120  std::pair<CommentKind, bool> K =
121  getCommentKind(RawText, CommentOpts.ParseAllComments);
122 
123  // Guess whether an ordinary comment is trailing.
124  if (CommentOpts.ParseAllComments && isOrdinaryKind(K.first)) {
125  FileID BeginFileID;
126  unsigned BeginOffset;
127  std::tie(BeginFileID, BeginOffset) =
128  SourceMgr.getDecomposedLoc(Range.getBegin());
129  if (BeginOffset != 0) {
130  bool Invalid = false;
131  const char *Buffer =
132  SourceMgr.getBufferData(BeginFileID, &Invalid).data();
133  IsTrailingComment |=
134  (!Invalid && !onlyWhitespaceOnLineBefore(Buffer, BeginOffset));
135  }
136  }
137 
138  if (!Merged) {
139  Kind = K.first;
140  IsTrailingComment |= K.second;
141 
142  IsAlmostTrailingComment = RawText.startswith("//<") ||
143  RawText.startswith("/*<");
144  } else {
145  Kind = RCK_Merged;
146  IsTrailingComment =
147  IsTrailingComment || mergedCommentIsTrailingComment(RawText);
148  }
149 }
150 
151 StringRef RawComment::getRawTextSlow(const SourceManager &SourceMgr) const {
152  FileID BeginFileID;
153  FileID EndFileID;
154  unsigned BeginOffset;
155  unsigned EndOffset;
156 
157  std::tie(BeginFileID, BeginOffset) =
158  SourceMgr.getDecomposedLoc(Range.getBegin());
159  std::tie(EndFileID, EndOffset) = SourceMgr.getDecomposedLoc(Range.getEnd());
160 
161  const unsigned Length = EndOffset - BeginOffset;
162  if (Length < 2)
163  return StringRef();
164 
165  // The comment can't begin in one file and end in another.
166  assert(BeginFileID == EndFileID);
167 
168  bool Invalid = false;
169  const char *BufferStart = SourceMgr.getBufferData(BeginFileID,
170  &Invalid).data();
171  if (Invalid)
172  return StringRef();
173 
174  return StringRef(BufferStart + BeginOffset, Length);
175 }
176 
177 const char *RawComment::extractBriefText(const ASTContext &Context) const {
178  // Lazily initialize RawText using the accessor before using it.
179  (void)getRawText(Context.getSourceManager());
180 
181  // Since we will be copying the resulting text, all allocations made during
182  // parsing are garbage after resulting string is formed. Thus we can use
183  // a separate allocator for all temporary stuff.
184  llvm::BumpPtrAllocator Allocator;
185 
186  comments::Lexer L(Allocator, Context.getDiagnostics(),
187  Context.getCommentCommandTraits(),
188  Range.getBegin(),
189  RawText.begin(), RawText.end());
191 
192  const std::string Result = P.Parse();
193  const unsigned BriefTextLength = Result.size();
194  char *BriefTextPtr = new (Context) char[BriefTextLength + 1];
195  memcpy(BriefTextPtr, Result.c_str(), BriefTextLength + 1);
196  BriefText = BriefTextPtr;
197  BriefTextValid = true;
198 
199  return BriefTextPtr;
200 }
201 
203  const Preprocessor *PP,
204  const Decl *D) const {
205  // Lazily initialize RawText using the accessor before using it.
206  (void)getRawText(Context.getSourceManager());
207 
208  comments::Lexer L(Context.getAllocator(), Context.getDiagnostics(),
209  Context.getCommentCommandTraits(),
211  RawText.begin(), RawText.end());
212  comments::Sema S(Context.getAllocator(), Context.getSourceManager(),
213  Context.getDiagnostics(),
214  Context.getCommentCommandTraits(),
215  PP);
216  S.setDecl(D);
217  comments::Parser P(L, S, Context.getAllocator(), Context.getSourceManager(),
218  Context.getDiagnostics(),
219  Context.getCommentCommandTraits());
220 
221  return P.parseFullComment();
222 }
223 
225  SourceLocation Loc1, SourceLocation Loc2,
226  unsigned MaxNewlinesAllowed) {
227  std::pair<FileID, unsigned> Loc1Info = SM.getDecomposedLoc(Loc1);
228  std::pair<FileID, unsigned> Loc2Info = SM.getDecomposedLoc(Loc2);
229 
230  // Question does not make sense if locations are in different files.
231  if (Loc1Info.first != Loc2Info.first)
232  return false;
233 
234  bool Invalid = false;
235  const char *Buffer = SM.getBufferData(Loc1Info.first, &Invalid).data();
236  if (Invalid)
237  return false;
238 
239  unsigned NumNewlines = 0;
240  assert(Loc1Info.second <= Loc2Info.second && "Loc1 after Loc2!");
241  // Look for non-whitespace characters and remember any newlines seen.
242  for (unsigned I = Loc1Info.second; I != Loc2Info.second; ++I) {
243  switch (Buffer[I]) {
244  default:
245  return false;
246  case ' ':
247  case '\t':
248  case '\f':
249  case '\v':
250  break;
251  case '\r':
252  case '\n':
253  ++NumNewlines;
254 
255  // Check if we have found more than the maximum allowed number of
256  // newlines.
257  if (NumNewlines > MaxNewlinesAllowed)
258  return false;
259 
260  // Collapse \r\n and \n\r into a single newline.
261  if (I + 1 != Loc2Info.second &&
262  (Buffer[I + 1] == '\n' || Buffer[I + 1] == '\r') &&
263  Buffer[I] != Buffer[I + 1])
264  ++I;
265  break;
266  }
267  }
268 
269  return true;
270 }
271 
273  const CommentOptions &CommentOpts,
274  llvm::BumpPtrAllocator &Allocator) {
275  if (RC.isInvalid())
276  return;
277 
278  // Ordinary comments are not interesting for us.
279  if (RC.isOrdinary() && !CommentOpts.ParseAllComments)
280  return;
281 
282  std::pair<FileID, unsigned> Loc =
283  SourceMgr.getDecomposedLoc(RC.getBeginLoc());
284 
285  const FileID CommentFile = Loc.first;
286  const unsigned CommentOffset = Loc.second;
287 
288  // If this is the first Doxygen comment, save it (because there isn't
289  // anything to merge it with).
290  if (OrderedComments[CommentFile].empty()) {
291  OrderedComments[CommentFile][CommentOffset] =
292  new (Allocator) RawComment(RC);
293  return;
294  }
295 
296  const RawComment &C1 = *OrderedComments[CommentFile].rbegin()->second;
297  const RawComment &C2 = RC;
298 
299  // Merge comments only if there is only whitespace between them.
300  // Can't merge trailing and non-trailing comments unless the second is
301  // non-trailing ordinary in the same column, as in the case:
302  // int x; // documents x
303  // // more text
304  // versus:
305  // int x; // documents x
306  // int y; // documents y
307  // or:
308  // int x; // documents x
309  // // documents y
310  // int y;
311  // Merge comments if they are on same or consecutive lines.
312  if ((C1.isTrailingComment() == C2.isTrailingComment() ||
313  (C1.isTrailingComment() && !C2.isTrailingComment() &&
314  isOrdinaryKind(C2.getKind()) &&
315  commentsStartOnSameColumn(SourceMgr, C1, C2))) &&
316  onlyWhitespaceBetween(SourceMgr, C1.getEndLoc(), C2.getBeginLoc(),
317  /*MaxNewlinesAllowed=*/1)) {
318  SourceRange MergedRange(C1.getBeginLoc(), C2.getEndLoc());
319  *OrderedComments[CommentFile].rbegin()->second =
320  RawComment(SourceMgr, MergedRange, CommentOpts, true);
321  } else {
322  OrderedComments[CommentFile][CommentOffset] =
323  new (Allocator) RawComment(RC);
324  }
325 }
326 
327 const std::map<unsigned, RawComment *> *
329  auto CommentsInFile = OrderedComments.find(File);
330  if (CommentsInFile == OrderedComments.end())
331  return nullptr;
332 
333  return &CommentsInFile->second;
334 }
335 
336 bool RawCommentList::empty() const { return OrderedComments.empty(); }
337 
339  unsigned Offset) const {
340  auto Cached = CommentBeginLine.find(C);
341  if (Cached != CommentBeginLine.end())
342  return Cached->second;
343  const unsigned Line = SourceMgr.getLineNumber(File, Offset);
344  CommentBeginLine[C] = Line;
345  return Line;
346 }
347 
349  auto Cached = CommentEndOffset.find(C);
350  if (Cached != CommentEndOffset.end())
351  return Cached->second;
352  const unsigned Offset =
353  SourceMgr.getDecomposedLoc(C->getSourceRange().getEnd()).second;
354  CommentEndOffset[C] = Offset;
355  return Offset;
356 }
357 
358 std::string RawComment::getFormattedText(const SourceManager &SourceMgr,
359  DiagnosticsEngine &Diags) const {
360  llvm::StringRef CommentText = getRawText(SourceMgr);
361  if (CommentText.empty())
362  return "";
363 
364  llvm::BumpPtrAllocator Allocator;
365  // We do not parse any commands, so CommentOptions are ignored by
366  // comments::Lexer. Therefore, we just use default-constructed options.
367  CommentOptions DefOpts;
368  comments::CommandTraits EmptyTraits(Allocator, DefOpts);
369  comments::Lexer L(Allocator, Diags, EmptyTraits, getSourceRange().getBegin(),
370  CommentText.begin(), CommentText.end(),
371  /*ParseCommands=*/false);
372 
373  std::string Result;
374  // A column number of the first non-whitespace token in the comment text.
375  // We skip whitespace up to this column, but keep the whitespace after this
376  // column. IndentColumn is calculated when lexing the first line and reused
377  // for the rest of lines.
378  unsigned IndentColumn = 0;
379 
380  // Processes one line of the comment and adds it to the result.
381  // Handles skipping the indent at the start of the line.
382  // Returns false when eof is reached and true otherwise.
383  auto LexLine = [&](bool IsFirstLine) -> bool {
385  // Lex the first token on the line. We handle it separately, because we to
386  // fix up its indentation.
387  L.lex(Tok);
388  if (Tok.is(comments::tok::eof))
389  return false;
390  if (Tok.is(comments::tok::newline)) {
391  Result += "\n";
392  return true;
393  }
394  llvm::StringRef TokText = L.getSpelling(Tok, SourceMgr);
395  bool LocInvalid = false;
396  unsigned TokColumn =
397  SourceMgr.getSpellingColumnNumber(Tok.getLocation(), &LocInvalid);
398  assert(!LocInvalid && "getFormattedText for invalid location");
399 
400  // Amount of leading whitespace in TokText.
401  size_t WhitespaceLen = TokText.find_first_not_of(" \t");
402  if (WhitespaceLen == StringRef::npos)
403  WhitespaceLen = TokText.size();
404  // Remember the amount of whitespace we skipped in the first line to remove
405  // indent up to that column in the following lines.
406  if (IsFirstLine)
407  IndentColumn = TokColumn + WhitespaceLen;
408 
409  // Amount of leading whitespace we actually want to skip.
410  // For the first line we skip all the whitespace.
411  // For the rest of the lines, we skip whitespace up to IndentColumn.
412  unsigned SkipLen =
413  IsFirstLine
414  ? WhitespaceLen
415  : std::min<size_t>(
416  WhitespaceLen,
417  std::max<int>(static_cast<int>(IndentColumn) - TokColumn, 0));
418  llvm::StringRef Trimmed = TokText.drop_front(SkipLen);
419  Result += Trimmed;
420  // Lex all tokens in the rest of the line.
421  for (L.lex(Tok); Tok.isNot(comments::tok::eof); L.lex(Tok)) {
422  if (Tok.is(comments::tok::newline)) {
423  Result += "\n";
424  return true;
425  }
426  Result += L.getSpelling(Tok, SourceMgr);
427  }
428  // We've reached the end of file token.
429  return false;
430  };
431 
432  auto DropTrailingNewLines = [](std::string &Str) {
433  while (Str.back() == '\n')
434  Str.pop_back();
435  };
436 
437  // Process first line separately to remember indent for the following lines.
438  if (!LexLine(/*IsFirstLine=*/true)) {
439  DropTrailingNewLines(Result);
440  return Result;
441  }
442  // Process the rest of the lines.
443  while (LexLine(/*IsFirstLine=*/false))
444  ;
445  DropTrailingNewLines(Result);
446  return Result;
447 }
Defines the clang::ASTContext interface.
bool isInvalid() const LLVM_READONLY
Decl - This represents one declaration (or definition), e.g.
Definition: DeclBase.h:88
unsigned getPresumedColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
StringRef P
DiagnosticsEngine & getDiagnostics() const
Any normal BCPL comments.
Options for controlling comment parsing.
SourceRange getSourceRange() const LLVM_READONLY
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: &#39; &#39;, &#39;\t&#39;, &#39;\f&#39;, &#39;\v&#39;.
Definition: CharInfo.h:70
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:168
unsigned getCommentEndOffset(RawComment *C) const
bool isNot(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:93
const FormatToken & Tok
Concrete class used by the front-end to report problems and issues.
Definition: Diagnostic.h:149
void addComment(const RawComment &RC, const CommentOptions &CommentOpts, llvm::BumpPtrAllocator &Allocator)
SourceLocation getEndLoc() const LLVM_READONLY
unsigned Offset
Definition: Format.cpp:1827
static bool onlyWhitespaceBetween(SourceManager &SM, SourceLocation Loc1, SourceLocation Loc2, unsigned MaxNewlinesAllowed)
Any normal C comment.
const AnnotatedLine * Line
comments::CommandTraits & getCommentCommandTraits() const
Definition: ASTContext.h:880
SourceLocation getBeginLoc() const LLVM_READONLY
SourceLocation getEnd() const
SourceLocation getLocation() const LLVM_READONLY
Definition: CommentLexer.h:80
The result type of a method or function.
const SourceManager & SM
Definition: Format.cpp:1685
comments::FullComment * parse(const ASTContext &Context, const Preprocessor *PP, const Decl *D) const
Parse the comment, assuming it is attached to decl D.
bool isOrdinary() const LLVM_READONLY
Returns true if this comment is not a documentation comment.
bool is(tok::TokenKind K) const LLVM_READONLY
Definition: CommentLexer.h:92
This class provides information about commands that can be used in comments.
unsigned getCommentBeginLine(RawComment *C, FileID File, unsigned Offset) const
#define false
Definition: stdbool.h:17
Kind
Two or more documentation comments merged together.
Encodes a location in the source.
static bool isOrdinaryKind(RawComment::CommentKind K)
Returns whether K is an ordinary comment kind.
const std::map< unsigned, RawComment * > * getCommentsInFile(FileID File) const
Comment lexer.
Definition: CommentLexer.h:220
unsigned getLineNumber(FileID FID, unsigned FilePos, bool *Invalid=nullptr) const
Given a SourceLocation, return the spelling line number for the position indicated.
static bool onlyWhitespaceOnLineBefore(const char *Buffer, unsigned P)
Determines whether there is only whitespace in Buffer between P and the previous line.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
unsigned getSpellingColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isTrailingComment() const LLVM_READONLY
Returns true if it is a comment that should be put after a member:
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: &#39;\n&#39;, &#39;\r&#39;.
Definition: CharInfo.h:78
llvm::BumpPtrAllocator & getAllocator() const
Definition: ASTContext.h:682
Comment token.
Definition: CommentLexer.h:55
SourceManager & getSourceManager()
Definition: ASTContext.h:679
Doxygen comment parser.
Definition: CommentParser.h:29
StringRef getRawText(const SourceManager &SourceMgr) const
Returns raw comment text with comment markers.
CommentKind getKind() const LLVM_READONLY
A very simple comment parser that extracts "a brief description".
A trivial tuple used to represent a source range.
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
A full comment attached to a declaration, contains block content.
Definition: Comment.h:1093
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128
bool ParseAllComments
Treat ordinary comments as documentation comments.