clang  10.0.0git
SarifDiagnostics.cpp
Go to the documentation of this file.
1 //===--- SarifDiagnostics.cpp - Sarif Diagnostics for Paths -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines the SarifDiagnostics object.
10 //
11 //===----------------------------------------------------------------------===//
12 
14 #include "clang/Basic/Version.h"
15 #include "clang/Lex/Preprocessor.h"
18 #include "llvm/ADT/STLExtras.h"
19 #include "llvm/ADT/StringMap.h"
20 #include "llvm/Support/ConvertUTF.h"
21 #include "llvm/Support/JSON.h"
22 #include "llvm/Support/Path.h"
23 
24 using namespace llvm;
25 using namespace clang;
26 using namespace ento;
27 
28 namespace {
29 class SarifDiagnostics : public PathDiagnosticConsumer {
30  std::string OutputFile;
31  const LangOptions &LO;
32 
33 public:
34  SarifDiagnostics(AnalyzerOptions &, const std::string &Output,
35  const LangOptions &LO)
36  : OutputFile(Output), LO(LO) {}
37  ~SarifDiagnostics() override = default;
38 
39  void FlushDiagnosticsImpl(std::vector<const PathDiagnostic *> &Diags,
40  FilesMade *FM) override;
41 
42  StringRef getName() const override { return "SarifDiagnostics"; }
43  PathGenerationScheme getGenerationScheme() const override { return Minimal; }
44  bool supportsLogicalOpControlFlow() const override { return true; }
45  bool supportsCrossFileDiagnostics() const override { return true; }
46 };
47 } // end anonymous namespace
48 
49 void ento::createSarifDiagnosticConsumer(
50  AnalyzerOptions &AnalyzerOpts, PathDiagnosticConsumers &C,
51  const std::string &Output, const Preprocessor &PP,
53  C.push_back(new SarifDiagnostics(AnalyzerOpts, Output, PP.getLangOpts()));
54 }
55 
56 static StringRef getFileName(const FileEntry &FE) {
57  StringRef Filename = FE.tryGetRealPathName();
58  if (Filename.empty())
59  Filename = FE.getName();
60  return Filename;
61 }
62 
63 static std::string percentEncodeURICharacter(char C) {
64  // RFC 3986 claims alpha, numeric, and this handful of
65  // characters are not reserved for the path component and
66  // should be written out directly. Otherwise, percent
67  // encode the character and write that out instead of the
68  // reserved character.
69  if (llvm::isAlnum(C) ||
70  StringRef::npos != StringRef("-._~:@!$&'()*+,;=").find(C))
71  return std::string(&C, 1);
72  return "%" + llvm::toHex(StringRef(&C, 1));
73 }
74 
75 static std::string fileNameToURI(StringRef Filename) {
76  llvm::SmallString<32> Ret = StringRef("file://");
77 
78  // Get the root name to see if it has a URI authority.
79  StringRef Root = sys::path::root_name(Filename);
80  if (Root.startswith("//")) {
81  // There is an authority, so add it to the URI.
82  Ret += Root.drop_front(2).str();
83  } else if (!Root.empty()) {
84  // There is no authority, so end the component and add the root to the URI.
85  Ret += Twine("/" + Root).str();
86  }
87 
88  auto Iter = sys::path::begin(Filename), End = sys::path::end(Filename);
89  assert(Iter != End && "Expected there to be a non-root path component.");
90  // Add the rest of the path components, encoding any reserved characters;
91  // we skip past the first path component, as it was handled it above.
92  std::for_each(++Iter, End, [&Ret](StringRef Component) {
93  // For reasons unknown to me, we may get a backslash with Windows native
94  // paths for the initial backslash following the drive component, which
95  // we need to ignore as a URI path part.
96  if (Component == "\\")
97  return;
98 
99  // Add the separator between the previous path part and the one being
100  // currently processed.
101  Ret += "/";
102 
103  // URI encode the part.
104  for (char C : Component) {
105  Ret += percentEncodeURICharacter(C);
106  }
107  });
108 
109  return Ret.str().str();
110 }
111 
112 static json::Object createArtifactLocation(const FileEntry &FE) {
113  return json::Object{{"uri", fileNameToURI(getFileName(FE))}};
114 }
115 
116 static json::Object createArtifact(const FileEntry &FE) {
117  return json::Object{{"location", createArtifactLocation(FE)},
118  {"roles", json::Array{"resultFile"}},
119  {"length", FE.getSize()},
120  {"mimeType", "text/plain"}};
121 }
122 
123 static json::Object createArtifactLocation(const FileEntry &FE,
124  json::Array &Artifacts) {
125  std::string FileURI = fileNameToURI(getFileName(FE));
126 
127  // See if the Artifacts array contains this URI already. If it does not,
128  // create a new artifact object to add to the array.
129  auto I = llvm::find_if(Artifacts, [&](const json::Value &File) {
130  if (const json::Object *Obj = File.getAsObject()) {
131  if (const json::Object *FileLoc = Obj->getObject("location")) {
132  Optional<StringRef> URI = FileLoc->getString("uri");
133  return URI && URI->equals(FileURI);
134  }
135  }
136  return false;
137  });
138 
139  // Calculate the index within the artifact array so it can be stored in
140  // the JSON object.
141  auto Index = static_cast<unsigned>(std::distance(Artifacts.begin(), I));
142  if (I == Artifacts.end())
143  Artifacts.push_back(createArtifact(FE));
144 
145  return json::Object{{"uri", FileURI}, {"index", Index}};
146 }
147 
148 static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc,
149  unsigned int TokenLen = 0) {
150  assert(!Loc.isInvalid() && "invalid Loc when adjusting column position");
151 
152  std::pair<FileID, unsigned> LocInfo = SM.getDecomposedExpansionLoc(Loc);
153  assert(LocInfo.second > SM.getExpansionColumnNumber(Loc) &&
154  "position in file is before column number?");
155 
156  bool InvalidBuffer = false;
157  const MemoryBuffer *Buf = SM.getBuffer(LocInfo.first, &InvalidBuffer);
158  assert(!InvalidBuffer && "got an invalid buffer for the location's file");
159  assert(Buf->getBufferSize() >= (LocInfo.second + TokenLen) &&
160  "token extends past end of buffer?");
161 
162  // Adjust the offset to be the start of the line, since we'll be counting
163  // Unicode characters from there until our column offset.
164  unsigned int Off = LocInfo.second - (SM.getExpansionColumnNumber(Loc) - 1);
165  unsigned int Ret = 1;
166  while (Off < (LocInfo.second + TokenLen)) {
167  Off += getNumBytesForUTF8(Buf->getBuffer()[Off]);
168  Ret++;
169  }
170 
171  return Ret;
172 }
173 
174 static json::Object createTextRegion(const LangOptions &LO, SourceRange R,
175  const SourceManager &SM) {
176  json::Object Region{
177  {"startLine", SM.getExpansionLineNumber(R.getBegin())},
178  {"startColumn", adjustColumnPos(SM, R.getBegin())},
179  };
180  if (R.getBegin() == R.getEnd()) {
181  Region["endColumn"] = adjustColumnPos(SM, R.getBegin());
182  } else {
183  Region["endLine"] = SM.getExpansionLineNumber(R.getEnd());
184  Region["endColumn"] = adjustColumnPos(
185  SM, R.getEnd(),
186  Lexer::MeasureTokenLength(R.getEnd(), SM, LO));
187  }
188  return Region;
189 }
190 
191 static json::Object createPhysicalLocation(const LangOptions &LO,
192  SourceRange R, const FileEntry &FE,
193  const SourceManager &SMgr,
194  json::Array &Artifacts) {
195  return json::Object{
196  {{"artifactLocation", createArtifactLocation(FE, Artifacts)},
197  {"region", createTextRegion(LO, R, SMgr)}}};
198 }
199 
201 
202 static StringRef importanceToStr(Importance I) {
203  switch (I) {
205  return "important";
207  return "essential";
209  return "unimportant";
210  }
211  llvm_unreachable("Fully covered switch is not so fully covered");
212 }
213 
214 static json::Object createThreadFlowLocation(json::Object &&Location,
215  Importance I) {
216  return json::Object{{"location", std::move(Location)},
217  {"importance", importanceToStr(I)}};
218 }
219 
220 static json::Object createMessage(StringRef Text) {
221  return json::Object{{"text", Text.str()}};
222 }
223 
224 static json::Object createLocation(json::Object &&PhysicalLocation,
225  StringRef Message = "") {
226  json::Object Ret{{"physicalLocation", std::move(PhysicalLocation)}};
227  if (!Message.empty())
228  Ret.insert({"message", createMessage(Message)});
229  return Ret;
230 }
231 
232 static Importance calculateImportance(const PathDiagnosticPiece &Piece) {
233  switch (Piece.getKind()) {
234  case PathDiagnosticPiece::Call:
235  case PathDiagnosticPiece::Macro:
237  case PathDiagnosticPiece::PopUp:
238  // FIXME: What should be reported here?
239  break;
240  case PathDiagnosticPiece::Event:
241  return Piece.getTagStr() == "ConditionBRVisitor" ? Importance::Important
243  case PathDiagnosticPiece::ControlFlow:
245  }
247 }
248 
249 static json::Object createThreadFlow(const LangOptions &LO,
250  const PathPieces &Pieces,
251  json::Array &Artifacts) {
252  const SourceManager &SMgr = Pieces.front()->getLocation().getManager();
253  json::Array Locations;
254  for (const auto &Piece : Pieces) {
255  const PathDiagnosticLocation &P = Piece->getLocation();
256  Locations.push_back(createThreadFlowLocation(
258  LO, P.asRange(),
259  *P.asLocation().getExpansionLoc().getFileEntry(),
260  SMgr, Artifacts),
261  Piece->getString()),
262  calculateImportance(*Piece)));
263  }
264  return json::Object{{"locations", std::move(Locations)}};
265 }
266 
267 static json::Object createCodeFlow(const LangOptions &LO,
268  const PathPieces &Pieces,
269  json::Array &Artifacts) {
270  return json::Object{
271  {"threadFlows", json::Array{createThreadFlow(LO, Pieces, Artifacts)}}};
272 }
273 
274 static json::Object createResult(const LangOptions &LO,
275  const PathDiagnostic &Diag,
276  json::Array &Artifacts,
277  const StringMap<unsigned> &RuleMapping) {
278  const PathPieces &Path = Diag.path.flatten(false);
279  const SourceManager &SMgr = Path.front()->getLocation().getManager();
280 
281  auto Iter = RuleMapping.find(Diag.getCheckerName());
282  assert(Iter != RuleMapping.end() && "Rule ID is not in the array index map?");
283 
284  return json::Object{
285  {"message", createMessage(Diag.getVerboseDescription())},
286  {"codeFlows", json::Array{createCodeFlow(LO, Path, Artifacts)}},
287  {"locations",
289  LO, Diag.getLocation().asRange(),
290  *Diag.getLocation().asLocation().getExpansionLoc().getFileEntry(),
291  SMgr, Artifacts))}},
292  {"ruleIndex", Iter->getValue()},
293  {"ruleId", Diag.getCheckerName()}};
294 }
295 
296 static StringRef getRuleDescription(StringRef CheckName) {
297  return llvm::StringSwitch<StringRef>(CheckName)
298 #define GET_CHECKERS
299 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
300  .Case(FULLNAME, HELPTEXT)
301 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
302 #undef CHECKER
303 #undef GET_CHECKERS
304  ;
305 }
306 
307 static StringRef getRuleHelpURIStr(StringRef CheckName) {
308  return llvm::StringSwitch<StringRef>(CheckName)
309 #define GET_CHECKERS
310 #define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN) \
311  .Case(FULLNAME, DOC_URI)
312 #include "clang/StaticAnalyzer/Checkers/Checkers.inc"
313 #undef CHECKER
314 #undef GET_CHECKERS
315  ;
316 }
317 
318 static json::Object createRule(const PathDiagnostic &Diag) {
319  StringRef CheckName = Diag.getCheckerName();
320  json::Object Ret{
321  {"fullDescription", createMessage(getRuleDescription(CheckName))},
322  {"name", CheckName},
323  {"id", CheckName}};
324 
325  std::string RuleURI = getRuleHelpURIStr(CheckName);
326  if (!RuleURI.empty())
327  Ret["helpUri"] = RuleURI;
328 
329  return Ret;
330 }
331 
332 static json::Array createRules(std::vector<const PathDiagnostic *> &Diags,
333  StringMap<unsigned> &RuleMapping) {
334  json::Array Rules;
335  llvm::StringSet<> Seen;
336 
337  llvm::for_each(Diags, [&](const PathDiagnostic *D) {
338  StringRef RuleID = D->getCheckerName();
339  std::pair<llvm::StringSet<>::iterator, bool> P = Seen.insert(RuleID);
340  if (P.second) {
341  RuleMapping[RuleID] = Rules.size(); // Maps RuleID to an Array Index.
342  Rules.push_back(createRule(*D));
343  }
344  });
345 
346  return Rules;
347 }
348 
349 static json::Object createTool(std::vector<const PathDiagnostic *> &Diags,
350  StringMap<unsigned> &RuleMapping) {
351  return json::Object{
352  {"driver", json::Object{{"name", "clang"},
353  {"fullName", "clang static analyzer"},
354  {"language", "en-US"},
355  {"version", getClangFullVersion()},
356  {"rules", createRules(Diags, RuleMapping)}}}};
357 }
358 
359 static json::Object createRun(const LangOptions &LO,
360  std::vector<const PathDiagnostic *> &Diags) {
361  json::Array Results, Artifacts;
362  StringMap<unsigned> RuleMapping;
363  json::Object Tool = createTool(Diags, RuleMapping);
364 
365  llvm::for_each(Diags, [&](const PathDiagnostic *D) {
366  Results.push_back(createResult(LO, *D, Artifacts, RuleMapping));
367  });
368 
369  return json::Object{{"tool", std::move(Tool)},
370  {"results", std::move(Results)},
371  {"artifacts", std::move(Artifacts)},
372  {"columnKind", "unicodeCodePoints"}};
373 }
374 
375 void SarifDiagnostics::FlushDiagnosticsImpl(
376  std::vector<const PathDiagnostic *> &Diags, FilesMade *) {
377  // We currently overwrite the file if it already exists. However, it may be
378  // useful to add a feature someday that allows the user to append a run to an
379  // existing SARIF file. One danger from that approach is that the size of the
380  // file can become large very quickly, so decoding into JSON to append a run
381  // may be an expensive operation.
382  std::error_code EC;
383  llvm::raw_fd_ostream OS(OutputFile, EC, llvm::sys::fs::OF_Text);
384  if (EC) {
385  llvm::errs() << "warning: could not create file: " << EC.message() << '\n';
386  return;
387  }
388  json::Object Sarif{
389  {"$schema",
390  "https://raw.githubusercontent.com/oasis-tcs/sarif-spec/master/Schemata/sarif-schema-2.1.0.json"},
391  {"version", "2.1.0"},
392  {"runs", json::Array{createRun(LO, Diags)}}};
393  OS << llvm::formatv("{0:2}\n", json::Value(std::move(Sarif)));
394 }
StringRef tryGetRealPathName() const
Definition: FileManager.h:103
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
static StringRef getFileName(const FileEntry &FE)
static json::Object createThreadFlow(const LangOptions &LO, const PathPieces &Pieces, json::Array &Artifacts)
std::string getClangFullVersion()
Retrieves a string representing the complete clang version, which includes the clang version number...
Definition: Version.cpp:117
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
Definition: Dominators.h:30
StringRef P
float __ovld __cnfn distance(float p0, float p1)
Returns the distance between p0 and p1.
static json::Object createThreadFlowLocation(json::Object &&Location, Importance I)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:53
const LangOptions & getLangOpts() const
Definition: Preprocessor.h:907
std::pair< FileID, unsigned > getDecomposedExpansionLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static StringRef getRuleDescription(StringRef CheckName)
static json::Object createArtifactLocation(const FileEntry &FE)
static StringRef importanceToStr(Importance I)
static json::Object createRun(const LangOptions &LO, std::vector< const PathDiagnostic *> &Diags)
static Importance calculateImportance(const PathDiagnosticPiece &Piece)
StringRef Filename
Definition: Format.cpp:1825
SourceLocation End
Defines version macros and version-related utility functions for Clang.
Defines the clang::Preprocessor interface.
static std::string percentEncodeURICharacter(char C)
static json::Object createRule(const PathDiagnostic &Diag)
SourceLocation getEnd() const
static json::Object createTool(std::vector< const PathDiagnostic *> &Diags, StringMap< unsigned > &RuleMapping)
unsigned getExpansionLineNumber(SourceLocation Loc, bool *Invalid=nullptr) const
const SourceManager & SM
Definition: Format.cpp:1685
Encodes a location in the source.
static json::Object createLocation(json::Object &&PhysicalLocation, StringRef Message="")
StringRef getName() const
Definition: FileManager.h:102
std::vector< PathDiagnosticConsumer * > PathDiagnosticConsumers
static bool Ret(InterpState &S, CodePtr &PC, APValue &Result)
Definition: Interp.cpp:34
static json::Object createResult(const LangOptions &LO, const PathDiagnostic &Diag, json::Array &Artifacts, const StringMap< unsigned > &RuleMapping)
Cached information about one file (either on disk or in the virtual file system). ...
Definition: FileManager.h:78
static json::Object createPhysicalLocation(const LangOptions &LO, SourceRange R, const FileEntry &FE, const SourceManager &SMgr, json::Array &Artifacts)
static json::Object createArtifact(const FileEntry &FE)
unsigned getExpansionColumnNumber(SourceLocation Loc, bool *Invalid=nullptr) const
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Dataflow Directional Tag Classes.
static std::string getName(const CallEvent &Call)
off_t getSize() const
Definition: FileManager.h:105
static json::Object createTextRegion(const LangOptions &LO, SourceRange R, const SourceManager &SM)
This class is used for tools that requires cross translation unit capability.
#define CHECKER(FULLNAME, CLASS, HELPTEXT, DOC_URI, IS_HIDDEN)
#define GET_CHECKERS
static json::Array createRules(std::vector< const PathDiagnostic *> &Diags, StringMap< unsigned > &RuleMapping)
Stores options for the analyzer from the command line.
static StringRef getRuleHelpURIStr(StringRef CheckName)
static std::string fileNameToURI(StringRef Filename)
static json::Object createCodeFlow(const LangOptions &LO, const PathPieces &Pieces, json::Array &Artifacts)
static unsigned int adjustColumnPos(const SourceManager &SM, SourceLocation Loc, unsigned int TokenLen=0)
StringRef Text
Definition: Format.cpp:1826
A trivial tuple used to represent a source range.
static json::Object createMessage(StringRef Text)
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
Definition: Preprocessor.h:128