clang-tools  8.0.0
FileDistance.cpp
Go to the documentation of this file.
1 //===--- FileDistance.cpp - File contents container -------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // The FileDistance structure allows calculating the minimum distance to paths
11 // in a single tree.
12 // We simply walk up the path's ancestors until we find a node whose cost is
13 // known, and add the cost of walking back down. Initialization ensures this
14 // gives the correct path to the roots.
15 // We cache the results, so that the runtime is O(|A|), where A is the set of
16 // all distinct ancestors of visited paths.
17 //
18 // Example after initialization with /=2, /bar=0, DownCost = 1:
19 // / = 2
20 // /bar = 0
21 //
22 // After querying /foo/bar and /bar/foo:
23 // / = 2
24 // /bar = 0
25 // /bar/foo = 1
26 // /foo = 3
27 // /foo/bar = 4
28 //
29 // URIDistance creates FileDistance lazily for each URI scheme encountered. In
30 // practice this is a small constant factor.
31 //
32 //===-------------------------------------------------------------------------//
33 
34 #include "FileDistance.h"
35 #include "Logger.h"
36 #include "llvm/ADT/STLExtras.h"
37 #include <queue>
38 
39 namespace clang {
40 namespace clangd {
41 
42 // Convert a path into the canonical form.
43 // Canonical form is either "/", or "/segment" * N:
44 // C:\foo\bar --> /c:/foo/bar
45 // /foo/ --> /foo
46 // a/b/c --> /a/b/c
47 static llvm::SmallString<128> canonicalize(llvm::StringRef Path) {
48  llvm::SmallString<128> Result = Path.rtrim('/');
49  native(Result, llvm::sys::path::Style::posix);
50  if (Result.empty() || Result.front() != '/')
51  Result.insert(Result.begin(), '/');
52  return Result;
53 }
54 
55 constexpr const unsigned FileDistance::Unreachable;
56 const llvm::hash_code FileDistance::RootHash =
57  llvm::hash_value(llvm::StringRef("/"));
58 
59 FileDistance::FileDistance(llvm::StringMap<SourceParams> Sources,
60  const FileDistanceOptions &Opts)
61  : Opts(Opts) {
62  llvm::DenseMap<llvm::hash_code, llvm::SmallVector<llvm::hash_code, 4>>
63  DownEdges;
64  // Compute the best distance following only up edges.
65  // Keep track of down edges, in case we can use them to improve on this.
66  for (const auto &S : Sources) {
67  auto Canonical = canonicalize(S.getKey());
68  dlog("Source {0} = {1}, MaxUp = {2}", Canonical, S.second.Cost,
69  S.second.MaxUpTraversals);
70  // Walk up to ancestors of this source, assigning cost.
71  llvm::StringRef Rest = Canonical;
72  llvm::hash_code Hash = llvm::hash_value(Rest);
73  for (unsigned I = 0; !Rest.empty(); ++I) {
74  Rest = parent_path(Rest, llvm::sys::path::Style::posix);
75  auto NextHash = llvm::hash_value(Rest);
76  auto &Down = DownEdges[NextHash];
77  if (!llvm::is_contained(Down, Hash))
78  Down.push_back(Hash);
79  // We can't just break after MaxUpTraversals, must still set DownEdges.
80  if (I > S.getValue().MaxUpTraversals) {
81  if (Cache.find(Hash) != Cache.end())
82  break;
83  } else {
84  unsigned Cost = S.getValue().Cost + I * Opts.UpCost;
85  auto R = Cache.try_emplace(Hash, Cost);
86  if (!R.second) {
87  if (Cost < R.first->second) {
88  R.first->second = Cost;
89  } else {
90  // If we're not the best way to get to this path, stop assigning.
91  break;
92  }
93  }
94  }
95  Hash = NextHash;
96  }
97  }
98  // Now propagate scores parent -> child if that's an improvement.
99  // BFS ensures we propagate down chains (must visit parents before children).
100  std::queue<llvm::hash_code> Next;
101  for (auto Child : DownEdges.lookup(llvm::hash_value(llvm::StringRef(""))))
102  Next.push(Child);
103  while (!Next.empty()) {
104  auto Parent = Next.front();
105  Next.pop();
106  auto ParentCost = Cache.lookup(Parent);
107  for (auto Child : DownEdges.lookup(Parent)) {
108  if (Parent != RootHash || Opts.AllowDownTraversalFromRoot) {
109  auto &ChildCost =
110  Cache.try_emplace(Child, Unreachable).first->getSecond();
111  if (ParentCost + Opts.DownCost < ChildCost)
112  ChildCost = ParentCost + Opts.DownCost;
113  }
114  Next.push(Child);
115  }
116  }
117 }
118 
119 unsigned FileDistance::distance(llvm::StringRef Path) {
120  auto Canonical = canonicalize(Path);
121  unsigned Cost = Unreachable;
122  llvm::SmallVector<llvm::hash_code, 16> Ancestors;
123  // Walk up ancestors until we find a path we know the distance for.
124  for (llvm::StringRef Rest = Canonical; !Rest.empty();
125  Rest = parent_path(Rest, llvm::sys::path::Style::posix)) {
126  auto Hash = llvm::hash_value(Rest);
127  if (Hash == RootHash && !Ancestors.empty() &&
129  Cost = Unreachable;
130  break;
131  }
132  auto It = Cache.find(Hash);
133  if (It != Cache.end()) {
134  Cost = It->second;
135  break;
136  }
137  Ancestors.push_back(Hash);
138  }
139  // Now we know the costs for (known node, queried node].
140  // Fill these in, walking down the directory tree.
141  for (llvm::hash_code Hash : llvm::reverse(Ancestors)) {
142  if (Cost != Unreachable)
143  Cost += Opts.DownCost;
144  Cache.try_emplace(Hash, Cost);
145  }
146  dlog("distance({0} = {1})", Path, Cost);
147  return Cost;
148 }
149 
150 unsigned URIDistance::distance(llvm::StringRef URI) {
151  auto R = Cache.try_emplace(llvm::hash_value(URI), FileDistance::Unreachable);
152  if (!R.second)
153  return R.first->getSecond();
154  if (auto U = clangd::URI::parse(URI)) {
155  dlog("distance({0} = {1})", URI, U->body());
156  R.first->second = forScheme(U->scheme()).distance(U->body());
157  } else {
158  log("URIDistance::distance() of unparseable {0}: {1}", URI, U.takeError());
159  }
160  return R.first->second;
161 }
162 
163 FileDistance &URIDistance::forScheme(llvm::StringRef Scheme) {
164  auto &Delegate = ByScheme[Scheme];
165  if (!Delegate) {
166  llvm::StringMap<SourceParams> SchemeSources;
167  for (const auto &Source : Sources) {
168  if (auto U = clangd::URI::create(Source.getKey(), Scheme))
169  SchemeSources.try_emplace(U->body(), Source.getValue());
170  else
171  llvm::consumeError(U.takeError());
172  }
173  dlog("FileDistance for scheme {0}: {1}/{2} sources", Scheme,
174  SchemeSources.size(), Sources.size());
175  Delegate.reset(new FileDistance(std::move(SchemeSources), Opts));
176  }
177  return *Delegate;
178 }
179 
180 static std::pair<std::string, int> scopeToPath(llvm::StringRef Scope) {
181  llvm::SmallVector<llvm::StringRef, 4> Split;
182  Scope.split(Split, "::", /*MaxSplit=*/-1, /*KeepEmpty=*/false);
183  return {"/" + llvm::join(Split, "/"), Split.size()};
184 }
185 
186 static FileDistance
187 createScopeFileDistance(llvm::ArrayRef<std::string> QueryScopes) {
188  FileDistanceOptions Opts;
189  Opts.UpCost = 2;
190  Opts.DownCost = 4;
191  Opts.AllowDownTraversalFromRoot = false;
192 
193  llvm::StringMap<SourceParams> Sources;
194  llvm::StringRef Preferred =
195  QueryScopes.empty() ? "" : QueryScopes.front().c_str();
196  for (llvm::StringRef S : QueryScopes) {
197  SourceParams Param;
198  // Penalize the global scope even it's preferred, as all projects can define
199  // symbols in it, and there is pattern where using-namespace is used in
200  // place of enclosing namespaces (e.g. in implementation files).
201  if (S == Preferred)
202  Param.Cost = S == "" ? 4 : 0;
203  else if (Preferred.startswith(S) && !S.empty())
204  continue; // just rely on up-traversals.
205  else
206  Param.Cost = S == "" ? 6 : 2;
207  auto Path = scopeToPath(S);
208  // The global namespace is not 'near' its children.
209  Param.MaxUpTraversals = std::max(Path.second - 1, 0);
210  Sources[Path.first] = std::move(Param);
211  }
212  return FileDistance(Sources, Opts);
213 }
214 
215 ScopeDistance::ScopeDistance(llvm::ArrayRef<std::string> QueryScopes)
216  : Distance(createScopeFileDistance(QueryScopes)) {}
217 
218 unsigned ScopeDistance::distance(llvm::StringRef SymbolScope) {
219  return Distance.distance(scopeToPath(SymbolScope).first);
220 }
221 
222 } // namespace clangd
223 } // namespace clang
ScopeDistance(llvm::ArrayRef< std::string > QueryScopes)
QueryScopes[0] is the preferred scope.
unsigned distance(llvm::StringRef SymbolScope)
static std::pair< std::string, int > scopeToPath(llvm::StringRef Scope)
static constexpr unsigned Unreachable
Definition: FileDistance.h:74
void log(const char *Fmt, Ts &&... Vals)
Definition: Logger.h:63
std::string Path
A typedef to represent a file path.
Definition: Path.h:21
#define dlog(...)
Definition: Logger.h:73
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
static llvm::SmallString< 128 > canonicalize(llvm::StringRef Path)
FileDistance(llvm::StringMap< SourceParams > Sources, const FileDistanceOptions &Opts={})
unsigned distance(llvm::StringRef URI)
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:188
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
static FileDistance createScopeFileDistance(llvm::ArrayRef< std::string > QueryScopes)
A URI describes the location of a source file.
Definition: URI.h:29
llvm::hash_code hash_value(const SymbolID &ID)
Definition: SymbolID.cpp:51
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:166
static std::string join(ArrayRef< SpecialMemberFunctionsCheck::SpecialMemberFunctionKind > SMFS, llvm::StringRef AndOr)
unsigned distance(llvm::StringRef Path)
static const llvm::hash_code RootHash
Definition: FileDistance.h:75