clang  10.0.0git
DependencyScanningFilesystem.cpp
Go to the documentation of this file.
1 //===- DependencyScanningFilesystem.cpp - clang-scan-deps fs --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
11 #include "llvm/Support/MemoryBuffer.h"
12 #include "llvm/Support/Threading.h"
13 
14 using namespace clang;
15 using namespace tooling;
16 using namespace dependencies;
17 
19  StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize) {
20  // Load the file and its content from the file system.
21  llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>> MaybeFile =
22  FS.openFileForRead(Filename);
23  if (!MaybeFile)
24  return MaybeFile.getError();
25  llvm::ErrorOr<llvm::vfs::Status> Stat = (*MaybeFile)->status();
26  if (!Stat)
27  return Stat.getError();
28 
29  llvm::vfs::File &F = **MaybeFile;
30  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>> MaybeBuffer =
31  F.getBuffer(Stat->getName());
32  if (!MaybeBuffer)
33  return MaybeBuffer.getError();
34 
35  llvm::SmallString<1024> MinimizedFileContents;
36  // Minimize the file down to directives that might affect the dependencies.
37  const auto &Buffer = *MaybeBuffer;
39  if (!Minimize || minimizeSourceToDependencyDirectives(
40  Buffer->getBuffer(), MinimizedFileContents, Tokens)) {
41  // Use the original file unless requested otherwise, or
42  // if the minimization failed.
43  // FIXME: Propage the diagnostic if desired by the client.
44  CachedFileSystemEntry Result;
45  Result.MaybeStat = std::move(*Stat);
46  Result.Contents.reserve(Buffer->getBufferSize() + 1);
47  Result.Contents.append(Buffer->getBufferStart(), Buffer->getBufferEnd());
48  // Implicitly null terminate the contents for Clang's lexer.
49  Result.Contents.push_back('\0');
50  Result.Contents.pop_back();
51  return Result;
52  }
53 
54  CachedFileSystemEntry Result;
55  size_t Size = MinimizedFileContents.size();
56  Result.MaybeStat = llvm::vfs::Status(Stat->getName(), Stat->getUniqueID(),
57  Stat->getLastModificationTime(),
58  Stat->getUser(), Stat->getGroup(), Size,
59  Stat->getType(), Stat->getPermissions());
60  // The contents produced by the minimizer must be null terminated.
61  assert(MinimizedFileContents.data()[MinimizedFileContents.size()] == '\0' &&
62  "not null terminated contents");
63  // Even though there's an implicit null terminator in the minimized contents,
64  // we want to temporarily make it explicit. This will ensure that the
65  // std::move will preserve it even if it needs to do a copy if the
66  // SmallString still has the small capacity.
67  MinimizedFileContents.push_back('\0');
68  Result.Contents = std::move(MinimizedFileContents);
69  // Now make the null terminator implicit again, so that Clang's lexer can find
70  // it right where the buffer ends.
71  Result.Contents.pop_back();
72 
73  // Compute the skipped PP ranges that speedup skipping over inactive
74  // preprocessor blocks.
76  SkippedRanges;
78  SkippedRanges);
80  for (const auto &Range : SkippedRanges) {
81  if (Range.Length < 16) {
82  // Ignore small ranges as non-profitable.
83  // FIXME: This is a heuristic, its worth investigating the tradeoffs
84  // when it should be applied.
85  continue;
86  }
87  Mapping[Range.Offset] = Range.Length;
88  }
89  Result.PPSkippedRangeMapping = std::move(Mapping);
90 
91  return Result;
92 }
93 
96  assert(Stat.isDirectory() && "not a directory!");
97  auto Result = CachedFileSystemEntry();
98  Result.MaybeStat = std::move(Stat);
99  return Result;
100 }
101 
104  // This heuristic was chosen using a empirical testing on a
105  // reasonably high core machine (iMacPro 18 cores / 36 threads). The cache
106  // sharding gives a performance edge by reducing the lock contention.
107  // FIXME: A better heuristic might also consider the OS to account for
108  // the different cost of lock contention on different OSes.
109  NumShards = std::max(2u, llvm::hardware_concurrency() / 4);
110  CacheShards = std::make_unique<CacheShard[]>(NumShards);
111 }
112 
113 /// Returns a cache entry for the corresponding key.
114 ///
115 /// A new cache entry is created if the key is not in the cache. This is a
116 /// thread safe call.
119  CacheShard &Shard = CacheShards[llvm::hash_value(Key) % NumShards];
120  std::unique_lock<std::mutex> LockGuard(Shard.CacheLock);
121  auto It = Shard.Cache.try_emplace(Key);
122  return It.first->getValue();
123 }
124 
125 /// Whitelist file extensions that should be minimized, treating no extension as
126 /// a source file that should be minimized.
127 ///
128 /// This is kinda hacky, it would be better if we knew what kind of file Clang
129 /// was expecting instead.
130 static bool shouldMinimize(StringRef Filename) {
131  StringRef Ext = llvm::sys::path::extension(Filename);
132  if (Ext.empty())
133  return true; // C++ standard library
134  return llvm::StringSwitch<bool>(Ext)
135  .CasesLower(".c", ".cc", ".cpp", ".c++", ".cxx", true)
136  .CasesLower(".h", ".hh", ".hpp", ".h++", ".hxx", true)
137  .CasesLower(".m", ".mm", true)
138  .CasesLower(".i", ".ii", ".mi", ".mmi", true)
139  .CasesLower(".def", ".inc", true)
140  .Default(false);
141 }
142 
143 
144 static bool shouldCacheStatFailures(StringRef Filename) {
145  StringRef Ext = llvm::sys::path::extension(Filename);
146  if (Ext.empty())
147  return false; // This may be the module cache directory.
148  return shouldMinimize(Filename); // Only cache stat failures on source files.
149 }
150 
151 llvm::ErrorOr<const CachedFileSystemEntry *>
152 DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
153  const StringRef Filename) {
154  if (const CachedFileSystemEntry *Entry = getCachedEntry(Filename)) {
155  return Entry;
156  }
157 
158  // FIXME: Handle PCM/PCH files.
159  // FIXME: Handle module map files.
160 
161  bool KeepOriginalSource = IgnoredFiles.count(Filename) ||
162  !shouldMinimize(Filename);
164  &SharedCacheEntry = SharedCache.get(Filename);
165  const CachedFileSystemEntry *Result;
166  {
167  std::unique_lock<std::mutex> LockGuard(SharedCacheEntry.ValueLock);
168  CachedFileSystemEntry &CacheEntry = SharedCacheEntry.Value;
169 
170  if (!CacheEntry.isValid()) {
171  llvm::vfs::FileSystem &FS = getUnderlyingFS();
172  auto MaybeStatus = FS.status(Filename);
173  if (!MaybeStatus) {
174  if (!shouldCacheStatFailures(Filename))
175  // HACK: We need to always restat non source files if the stat fails.
176  // This is because Clang first looks up the module cache and module
177  // files before building them, and then looks for them again. If we
178  // cache the stat failure, it won't see them the second time.
179  return MaybeStatus.getError();
180  else
181  CacheEntry = CachedFileSystemEntry(MaybeStatus.getError());
182  } else if (MaybeStatus->isDirectory())
184  std::move(*MaybeStatus));
185  else
187  Filename, FS, !KeepOriginalSource);
188  }
189 
190  Result = &CacheEntry;
191  }
192 
193  // Store the result in the local cache.
194  setCachedEntry(Filename, Result);
195  return Result;
196 }
197 
198 llvm::ErrorOr<llvm::vfs::Status>
200  SmallString<256> OwnedFilename;
201  StringRef Filename = Path.toStringRef(OwnedFilename);
202  const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
203  getOrCreateFileSystemEntry(Filename);
204  if (!Result)
205  return Result.getError();
206  return (*Result)->getStatus();
207 }
208 
209 namespace {
210 
211 /// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
212 /// this subclass.
213 class MinimizedVFSFile final : public llvm::vfs::File {
214 public:
215  MinimizedVFSFile(std::unique_ptr<llvm::MemoryBuffer> Buffer,
216  llvm::vfs::Status Stat)
217  : Buffer(std::move(Buffer)), Stat(std::move(Stat)) {}
218 
219  llvm::ErrorOr<llvm::vfs::Status> status() override { return Stat; }
220 
221  const llvm::MemoryBuffer *getBufferPtr() const { return Buffer.get(); }
222 
223  llvm::ErrorOr<std::unique_ptr<llvm::MemoryBuffer>>
224  getBuffer(const Twine &Name, int64_t FileSize, bool RequiresNullTerminator,
225  bool IsVolatile) override {
226  return std::move(Buffer);
227  }
228 
229  std::error_code close() override { return {}; }
230 
231 private:
232  std::unique_ptr<llvm::MemoryBuffer> Buffer;
233  llvm::vfs::Status Stat;
234 };
235 
236 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
237 createFile(const CachedFileSystemEntry *Entry,
239  if (Entry->isDirectory())
240  return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
241  std::make_error_code(std::errc::is_a_directory));
242  llvm::ErrorOr<StringRef> Contents = Entry->getContents();
243  if (!Contents)
244  return Contents.getError();
245  auto Result = std::make_unique<MinimizedVFSFile>(
246  llvm::MemoryBuffer::getMemBuffer(*Contents, Entry->getName(),
247  /*RequiresNullTerminator=*/false),
248  *Entry->getStatus());
249  if (!Entry->getPPSkippedRangeMapping().empty() && PPSkipMappings)
250  (*PPSkipMappings)[Result->getBufferPtr()] =
251  &Entry->getPPSkippedRangeMapping();
252  return llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>(
253  std::unique_ptr<llvm::vfs::File>(std::move(Result)));
254 }
255 
256 } // end anonymous namespace
257 
258 llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
260  SmallString<256> OwnedFilename;
261  StringRef Filename = Path.toStringRef(OwnedFilename);
262 
263  const llvm::ErrorOr<const CachedFileSystemEntry *> Result =
264  getOrCreateFileSystemEntry(Filename);
265  if (!Result)
266  return Result.getError();
267  return createFile(Result.get(), PPSkipMappings);
268 }
bool minimizeSourceToDependencyDirectives(llvm::StringRef Input, llvm::SmallVectorImpl< char > &Output, llvm::SmallVectorImpl< minimize_source_to_dependency_directives::Token > &Tokens, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Minimize the input down to the preprocessor directives that might have an effect on the dependencies ...
llvm::ErrorOr< llvm::vfs::Status > status(const Twine &Path) override
std::error_code make_error_code(ParseError e)
Definition: Format.cpp:625
CachedFileSystemEntry()
Default constructor creates an entry with an invalid stat.
SharedFileSystemEntry & get(StringRef Key)
Returns a cache entry for the corresponding key.
__DEVICE__ int max(int __a, int __b)
StringRef Filename
Definition: Format.cpp:1825
A source range independent of the SourceManager.
Definition: Replacement.h:44
static bool shouldMinimize(StringRef Filename)
Whitelist file extensions that should be minimized, treating no extension as a source file that shoul...
static CachedFileSystemEntry createFileEntry(StringRef Filename, llvm::vfs::FileSystem &FS, bool Minimize=true)
Create an entry that represents an opened source file with minimized or original contents.
llvm::hash_code hash_value(const clang::SanitizerMask &Arg)
Definition: Sanitizers.cpp:51
static bool shouldCacheStatFailures(StringRef Filename)
static CachedFileSystemEntry createDirectoryEntry(llvm::vfs::Status &&Stat)
Create an entry that represents a directory on the filesystem.
const PreprocessorSkippedRangeMapping & getPPSkippedRangeMapping() const
Return the mapping between location -> distance that is used to speed up the block skipping in the pr...
llvm::ErrorOr< llvm::vfs::Status > getStatus() const
Dataflow Directional Tag Classes.
llvm::DenseMap< unsigned, unsigned > PreprocessorSkippedRangeMapping
A mapping from an offset into a buffer to the number of bytes that can be skipped by the preprocessor...
llvm::ErrorOr< std::unique_ptr< llvm::vfs::File > > openFileForRead(const Twine &Path) override
llvm::DenseMap< const llvm::MemoryBuffer *, const PreprocessorSkippedRangeMapping * > ExcludedPreprocessorDirectiveSkipMapping
The datastructure that holds the mapping between the active memory buffers and the individual skip ma...
This is the interface for minimizing header and source files to the minimum necessary preprocessor di...
An in-memory representation of a file system entity that is of interest to the dependency scanning fi...
bool computeSkippedRanges(ArrayRef< Token > Input, llvm::SmallVectorImpl< SkippedRange > &Range)
Computes the potential source ranges that can be skipped by the preprocessor when skipping a directiv...