clang-tools  8.0.0
Quality.cpp
Go to the documentation of this file.
1 //===--- Quality.cpp ---------------------------------------------*- C++-*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 #include "Quality.h"
10 #include "AST.h"
11 #include "FileDistance.h"
12 #include "URI.h"
13 #include "index/Index.h"
14 #include "clang/AST/ASTContext.h"
15 #include "clang/AST/Decl.h"
16 #include "clang/AST/DeclCXX.h"
17 #include "clang/AST/DeclTemplate.h"
18 #include "clang/AST/DeclVisitor.h"
19 #include "clang/Basic/CharInfo.h"
20 #include "clang/Basic/SourceManager.h"
21 #include "clang/Sema/CodeCompleteConsumer.h"
22 #include "llvm/ADT/ArrayRef.h"
23 #include "llvm/ADT/SmallString.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringExtras.h"
26 #include "llvm/ADT/StringRef.h"
27 #include "llvm/Support/Casting.h"
28 #include "llvm/Support/FormatVariadic.h"
29 #include "llvm/Support/MathExtras.h"
30 #include "llvm/Support/raw_ostream.h"
31 #include <algorithm>
32 #include <cmath>
33 
34 namespace clang {
35 namespace clangd {
36 static bool isReserved(llvm::StringRef Name) {
37  // FIXME: Should we exclude _Bool and others recognized by the standard?
38  return Name.size() >= 2 && Name[0] == '_' &&
39  (isUppercase(Name[1]) || Name[1] == '_');
40 }
41 
42 static bool hasDeclInMainFile(const Decl &D) {
43  auto &SourceMgr = D.getASTContext().getSourceManager();
44  for (auto *Redecl : D.redecls()) {
45  auto Loc = SourceMgr.getSpellingLoc(Redecl->getLocation());
46  if (SourceMgr.isWrittenInMainFile(Loc))
47  return true;
48  }
49  return false;
50 }
51 
52 static bool hasUsingDeclInMainFile(const CodeCompletionResult &R) {
53  const auto &Context = R.Declaration->getASTContext();
54  const auto &SourceMgr = Context.getSourceManager();
55  if (R.ShadowDecl) {
56  const auto Loc = SourceMgr.getExpansionLoc(R.ShadowDecl->getLocation());
57  if (SourceMgr.isWrittenInMainFile(Loc))
58  return true;
59  }
60  return false;
61 }
62 
63 static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND) {
64  if (const auto *FD = dyn_cast<FunctionDecl>(&ND)) {
65  if (FD->isOverloadedOperator())
67  }
68  class Switch
69  : public ConstDeclVisitor<Switch, SymbolQualitySignals::SymbolCategory> {
70  public:
71 #define MAP(DeclType, Category) \
72  SymbolQualitySignals::SymbolCategory Visit##DeclType(const DeclType *) { \
73  return SymbolQualitySignals::Category; \
74  }
75  MAP(NamespaceDecl, Namespace);
76  MAP(NamespaceAliasDecl, Namespace);
77  MAP(TypeDecl, Type);
78  MAP(TypeAliasTemplateDecl, Type);
79  MAP(ClassTemplateDecl, Type);
80  MAP(CXXConstructorDecl, Constructor);
81  MAP(CXXDestructorDecl, Destructor);
82  MAP(ValueDecl, Variable);
83  MAP(VarTemplateDecl, Variable);
84  MAP(FunctionDecl, Function);
85  MAP(FunctionTemplateDecl, Function);
86  MAP(Decl, Unknown);
87 #undef MAP
88  };
89  return Switch().Visit(&ND);
90 }
91 
93 categorize(const CodeCompletionResult &R) {
94  if (R.Declaration)
95  return categorize(*R.Declaration);
96  if (R.Kind == CodeCompletionResult::RK_Macro)
98  // Everything else is a keyword or a pattern. Patterns are mostly keywords
99  // too, except a few which we recognize by cursor kind.
100  switch (R.CursorKind) {
101  case CXCursor_CXXMethod:
103  case CXCursor_ModuleImportDecl:
105  case CXCursor_MacroDefinition:
107  case CXCursor_TypeRef:
109  case CXCursor_MemberRef:
111  case CXCursor_Constructor:
113  default:
115  }
116 }
117 
120  switch (D.Kind) {
121  case index::SymbolKind::Namespace:
122  case index::SymbolKind::NamespaceAlias:
124  case index::SymbolKind::Macro:
126  case index::SymbolKind::Enum:
127  case index::SymbolKind::Struct:
128  case index::SymbolKind::Class:
129  case index::SymbolKind::Protocol:
130  case index::SymbolKind::Extension:
131  case index::SymbolKind::Union:
132  case index::SymbolKind::TypeAlias:
134  case index::SymbolKind::Function:
135  case index::SymbolKind::ClassMethod:
136  case index::SymbolKind::InstanceMethod:
137  case index::SymbolKind::StaticMethod:
138  case index::SymbolKind::InstanceProperty:
139  case index::SymbolKind::ClassProperty:
140  case index::SymbolKind::StaticProperty:
141  case index::SymbolKind::ConversionFunction:
143  case index::SymbolKind::Destructor:
145  case index::SymbolKind::Constructor:
147  case index::SymbolKind::Variable:
148  case index::SymbolKind::Field:
149  case index::SymbolKind::EnumConstant:
150  case index::SymbolKind::Parameter:
152  case index::SymbolKind::Using:
153  case index::SymbolKind::Module:
156  }
157  llvm_unreachable("Unknown index::SymbolKind");
158 }
159 
160 static bool isInstanceMember(const NamedDecl *ND) {
161  if (!ND)
162  return false;
163  if (const auto *TP = dyn_cast<FunctionTemplateDecl>(ND))
164  ND = TP->TemplateDecl::getTemplatedDecl();
165  if (const auto *CM = dyn_cast<CXXMethodDecl>(ND))
166  return !CM->isStatic();
167  return isa<FieldDecl>(ND); // Note that static fields are VarDecl.
168 }
169 
170 static bool isInstanceMember(const index::SymbolInfo &D) {
171  switch (D.Kind) {
172  case index::SymbolKind::InstanceMethod:
173  case index::SymbolKind::InstanceProperty:
174  case index::SymbolKind::Field:
175  return true;
176  default:
177  return false;
178  }
179 }
180 
181 void SymbolQualitySignals::merge(const CodeCompletionResult &SemaCCResult) {
182  Deprecated |= (SemaCCResult.Availability == CXAvailability_Deprecated);
183  Category = categorize(SemaCCResult);
184 
185  if (SemaCCResult.Declaration) {
186  ImplementationDetail |= isImplementationDetail(SemaCCResult.Declaration);
187  if (auto *ID = SemaCCResult.Declaration->getIdentifier())
188  ReservedName = ReservedName || isReserved(ID->getName());
189  } else if (SemaCCResult.Kind == CodeCompletionResult::RK_Macro)
190  ReservedName = ReservedName || isReserved(SemaCCResult.Macro->getName());
191 }
192 
194  Deprecated |= (IndexResult.Flags & Symbol::Deprecated);
195  ImplementationDetail |= (IndexResult.Flags & Symbol::ImplementationDetail);
196  References = std::max(IndexResult.References, References);
197  Category = categorize(IndexResult.SymInfo);
198  ReservedName = ReservedName || isReserved(IndexResult.Name);
199 }
200 
202  float Score = 1;
203 
204  // This avoids a sharp gradient for tail symbols, and also neatly avoids the
205  // question of whether 0 references means a bad symbol or missing data.
206  if (References >= 10) {
207  // Use a sigmoid style boosting function, which flats out nicely for large
208  // numbers (e.g. 2.58 for 1M refererences).
209  // The following boosting function is equivalent to:
210  // m = 0.06
211  // f = 12.0
212  // boost = f * sigmoid(m * std::log(References)) - 0.5 * f + 0.59
213  // Sample data points: (10, 1.00), (100, 1.41), (1000, 1.82),
214  // (10K, 2.21), (100K, 2.58), (1M, 2.94)
215  float S = std::pow(References, -0.06);
216  Score *= 6.0 * (1 - S) / (1 + S) + 0.59;
217  }
218 
219  if (Deprecated)
220  Score *= 0.1f;
221  if (ReservedName)
222  Score *= 0.1f;
223  if (ImplementationDetail)
224  Score *= 0.2f;
225 
226  switch (Category) {
227  case Keyword: // Often relevant, but misses most signals.
228  Score *= 4; // FIXME: important keywords should have specific boosts.
229  break;
230  case Type:
231  case Function:
232  case Variable:
233  Score *= 1.1f;
234  break;
235  case Namespace:
236  Score *= 0.8f;
237  break;
238  case Macro:
239  case Destructor:
240  case Operator:
241  Score *= 0.5f;
242  break;
243  case Constructor: // No boost constructors so they are after class types.
244  case Unknown:
245  break;
246  }
247 
248  return Score;
249 }
250 
251 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
252  const SymbolQualitySignals &S) {
253  OS << llvm::formatv("=== Symbol quality: {0}\n", S.evaluate());
254  OS << llvm::formatv("\tReferences: {0}\n", S.References);
255  OS << llvm::formatv("\tDeprecated: {0}\n", S.Deprecated);
256  OS << llvm::formatv("\tReserved name: {0}\n", S.ReservedName);
257  OS << llvm::formatv("\tCategory: {0}\n", static_cast<int>(S.Category));
258  return OS;
259 }
260 
262 computeScope(const NamedDecl *D) {
263  // Injected "Foo" within the class "Foo" has file scope, not class scope.
264  const DeclContext *DC = D->getDeclContext();
265  if (auto *R = dyn_cast_or_null<RecordDecl>(D))
266  if (R->isInjectedClassName())
267  DC = DC->getParent();
268  // Class constructor should have the same scope as the class.
269  if (isa<CXXConstructorDecl>(D))
270  DC = DC->getParent();
271  bool InClass = false;
272  for (; !DC->isFileContext(); DC = DC->getParent()) {
273  if (DC->isFunctionOrMethod())
275  InClass = InClass || DC->isRecord();
276  }
277  if (InClass)
279  // This threshold could be tweaked, e.g. to treat module-visible as global.
280  if (D->getLinkageInternal() < ExternalLinkage)
283 }
284 
286  // FIXME: Index results always assumed to be at global scope. If Scope becomes
287  // relevant to non-completion requests, we should recognize class members etc.
288 
289  SymbolURI = IndexResult.CanonicalDeclaration.FileURI;
290  SymbolScope = IndexResult.Scope;
291  IsInstanceMember |= isInstanceMember(IndexResult.SymInfo);
292 }
293 
294 void SymbolRelevanceSignals::merge(const CodeCompletionResult &SemaCCResult) {
295  if (SemaCCResult.Availability == CXAvailability_NotAvailable ||
296  SemaCCResult.Availability == CXAvailability_NotAccessible)
297  Forbidden = true;
298 
299  if (SemaCCResult.Declaration) {
300  SemaSaysInScope = true;
301  // We boost things that have decls in the main file. We give a fixed score
302  // for all other declarations in sema as they are already included in the
303  // translation unit.
304  float DeclProximity = (hasDeclInMainFile(*SemaCCResult.Declaration) ||
305  hasUsingDeclInMainFile(SemaCCResult))
306  ? 1.0
307  : 0.6;
308  SemaFileProximityScore = std::max(DeclProximity, SemaFileProximityScore);
309  IsInstanceMember |= isInstanceMember(SemaCCResult.Declaration);
310  InBaseClass |= SemaCCResult.InBaseClass;
311  }
312 
313  // Declarations are scoped, others (like macros) are assumed global.
314  if (SemaCCResult.Declaration)
315  Scope = std::min(Scope, computeScope(SemaCCResult.Declaration));
316 
317  NeedsFixIts = !SemaCCResult.FixIts.empty();
318 }
319 
320 static std::pair<float, unsigned> uriProximity(llvm::StringRef SymbolURI,
321  URIDistance *D) {
322  if (!D || SymbolURI.empty())
323  return {0.f, 0u};
324  unsigned Distance = D->distance(SymbolURI);
325  // Assume approximately default options are used for sensible scoring.
326  return {std::exp(Distance * -0.4f / FileDistanceOptions().UpCost), Distance};
327 }
328 
329 static float scopeBoost(ScopeDistance &Distance,
330  llvm::Optional<llvm::StringRef> SymbolScope) {
331  if (!SymbolScope)
332  return 1;
333  auto D = Distance.distance(*SymbolScope);
335  return 0.6f;
336  return std::max(0.65, 2.0 * std::pow(0.6, D / 2.0));
337 }
338 
340  float Score = 1;
341 
342  if (Forbidden)
343  return 0;
344 
345  Score *= NameMatch;
346 
347  // File proximity scores are [0,1] and we translate them into a multiplier in
348  // the range from 1 to 3.
349  Score *= 1 + 2 * std::max(uriProximity(SymbolURI, FileProximityMatch).first,
350  SemaFileProximityScore);
351 
352  if (ScopeProximityMatch)
353  // Use a constant scope boost for sema results, as scopes of sema results
354  // can be tricky (e.g. class/function scope). Set to the max boost as we
355  // don't load top-level symbols from the preamble and sema results are
356  // always in the accessible scope.
357  Score *=
358  SemaSaysInScope ? 2.0 : scopeBoost(*ScopeProximityMatch, SymbolScope);
359 
360  // Symbols like local variables may only be referenced within their scope.
361  // Conversely if we're in that scope, it's likely we'll reference them.
362  if (Query == CodeComplete) {
363  // The narrower the scope where a symbol is visible, the more likely it is
364  // to be relevant when it is available.
365  switch (Scope) {
366  case GlobalScope:
367  break;
368  case FileScope:
369  Score *= 1.5;
370  break;
371  case ClassScope:
372  Score *= 2;
373  break;
374  case FunctionScope:
375  Score *= 4;
376  break;
377  }
378  }
379 
380  if (TypeMatchesPreferred)
381  Score *= 5.0;
382 
383  // Penalize non-instance members when they are accessed via a class instance.
384  if (!IsInstanceMember &&
385  (Context == CodeCompletionContext::CCC_DotMemberAccess ||
386  Context == CodeCompletionContext::CCC_ArrowMemberAccess)) {
387  Score *= 0.2f;
388  }
389 
390  if (InBaseClass)
391  Score *= 0.5f;
392 
393  // Penalize for FixIts.
394  if (NeedsFixIts)
395  Score *= 0.5f;
396 
397  return Score;
398 }
399 
400 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
401  const SymbolRelevanceSignals &S) {
402  OS << llvm::formatv("=== Symbol relevance: {0}\n", S.evaluate());
403  OS << llvm::formatv("\tName match: {0}\n", S.NameMatch);
404  OS << llvm::formatv("\tForbidden: {0}\n", S.Forbidden);
405  OS << llvm::formatv("\tNeedsFixIts: {0}\n", S.NeedsFixIts);
406  OS << llvm::formatv("\tIsInstanceMember: {0}\n", S.IsInstanceMember);
407  OS << llvm::formatv("\tContext: {0}\n", getCompletionKindString(S.Context));
408  OS << llvm::formatv("\tQuery type: {0}\n", static_cast<int>(S.Query));
409  OS << llvm::formatv("\tScope: {0}\n", static_cast<int>(S.Scope));
410 
411  OS << llvm::formatv("\tSymbol URI: {0}\n", S.SymbolURI);
412  OS << llvm::formatv("\tSymbol scope: {0}\n",
413  S.SymbolScope ? *S.SymbolScope : "<None>");
414 
415  if (S.FileProximityMatch) {
416  auto Score = uriProximity(S.SymbolURI, S.FileProximityMatch);
417  OS << llvm::formatv("\tIndex URI proximity: {0} (distance={1})\n",
418  Score.first, Score.second);
419  }
420  OS << llvm::formatv("\tSema file proximity: {0}\n", S.SemaFileProximityScore);
421 
422  OS << llvm::formatv("\tSema says in scope: {0}\n", S.SemaSaysInScope);
423  if (S.ScopeProximityMatch)
424  OS << llvm::formatv("\tIndex scope boost: {0}\n",
426 
427  OS << llvm::formatv(
428  "\tType matched preferred: {0} (Context type: {1}, Symbol type: {2}\n",
430 
431  return OS;
432 }
433 
434 float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance) {
435  return SymbolQuality * SymbolRelevance;
436 }
437 
438 // Produces an integer that sorts in the same order as F.
439 // That is: a < b <==> encodeFloat(a) < encodeFloat(b).
440 static uint32_t encodeFloat(float F) {
441  static_assert(std::numeric_limits<float>::is_iec559, "");
442  constexpr uint32_t TopBit = ~(~uint32_t{0} >> 1);
443 
444  // Get the bits of the float. Endianness is the same as for integers.
445  uint32_t U = llvm::FloatToBits(F);
446  // IEEE 754 floats compare like sign-magnitude integers.
447  if (U & TopBit) // Negative float.
448  return 0 - U; // Map onto the low half of integers, order reversed.
449  return U + TopBit; // Positive floats map onto the high half of integers.
450 }
451 
452 std::string sortText(float Score, llvm::StringRef Name) {
453  // We convert -Score to an integer, and hex-encode for readability.
454  // Example: [0.5, "foo"] -> "41000000foo"
455  std::string S;
456  llvm::raw_string_ostream OS(S);
457  llvm::write_hex(OS, encodeFloat(-Score), llvm::HexPrintStyle::Lower,
458  /*Width=*/2 * sizeof(Score));
459  OS << Name;
460  OS.flush();
461  return S;
462 }
463 
464 llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
465  const SignatureQualitySignals &S) {
466  OS << llvm::formatv("=== Signature Quality:\n");
467  OS << llvm::formatv("\tNumber of parameters: {0}\n", S.NumberOfParameters);
468  OS << llvm::formatv("\tNumber of optional parameters: {0}\n",
470  OS << llvm::formatv("\tContains active parameter: {0}\n",
472  OS << llvm::formatv("\tKind: {0}\n", S.Kind);
473  return OS;
474 }
475 
476 } // namespace clangd
477 } // namespace clang
SourceLocation Loc
&#39;#&#39; location in the include directive
static SymbolQualitySignals::SymbolCategory categorize(const NamedDecl &ND)
Definition: Quality.cpp:63
void merge(const CodeCompletionResult &SemaCCResult)
Definition: Quality.cpp:181
static float scopeBoost(ScopeDistance &Distance, llvm::Optional< llvm::StringRef > SymbolScope)
Definition: Quality.cpp:329
unsigned distance(llvm::StringRef SymbolScope)
llvm::APSInt Lower
llvm::Optional< llvm::StringRef > SymbolScope
Definition: Quality.h:106
static uint32_t encodeFloat(float F)
Definition: Quality.cpp:440
enum clang::clangd::SymbolQualitySignals::SymbolCategory Category
void merge(const CodeCompletionResult &SemaResult)
Definition: Quality.cpp:294
std::string sortText(float Score, llvm::StringRef Name)
Returns a string that sorts in the same order as (-Score, Tiebreak), for LSP.
Definition: Quality.cpp:452
static constexpr unsigned Unreachable
Definition: FileDistance.h:74
static bool isReserved(llvm::StringRef Name)
Definition: Quality.cpp:36
CodeCompletionContext::Kind Context
Definition: Quality.h:123
llvm::StringRef Scope
Definition: Index.h:168
Attributes of a symbol that affect how much we like it.
Definition: Quality.h:57
enum clang::clangd::SymbolRelevanceSignals::AccessibleScope Scope
CodeCompleteConsumer::OverloadCandidate::CandidateKind Kind
Definition: Quality.h:195
unsigned References
Definition: Index.h:183
index::SymbolInfo SymInfo
Definition: Index.h:164
bool NeedsFixIts
Whether fixits needs to be applied for that completion or not.
Definition: Quality.h:92
enum clang::clangd::SymbolRelevanceSignals::QueryType Query
clang::find_all_symbols::SymbolInfo SymbolInfo
#define MAP(DeclType, Category)
SymbolFlag Flags
Definition: Index.h:248
*that are placed right before the argument **code *void f(bool foo)
Checks that argument comments match parameter names.
static constexpr llvm::StringLiteral Name
SymbolLocation CanonicalDeclaration
Definition: Index.h:180
const Decl * D
Definition: XRefs.cpp:79
const Symbol * IndexResult
A context is an immutable container for per-request data that must be propagated through layers that ...
Definition: Context.h:70
static bool hasDeclInMainFile(const Decl &D)
Definition: Quality.cpp:42
unsigned distance(llvm::StringRef URI)
llvm::StringRef SymbolURI
These are used to calculate proximity between the index symbol and the query.
Definition: Quality.h:98
float evaluateSymbolAndRelevance(float SymbolQuality, float SymbolRelevance)
Combine symbol quality and relevance into a single score.
Definition: Quality.cpp:434
static bool isInstanceMember(const NamedDecl *ND)
Definition: Quality.cpp:160
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Support lookups like FileDistance, but the lookup keys are symbol scopes.
Definition: FileDistance.h:117
llvm::StringRef Name
Definition: Index.h:166
float SemaFileProximityScore
FIXME: unify with index proximity score - signals should be source-independent.
Definition: Quality.h:102
static SymbolRelevanceSignals::AccessibleScope computeScope(const NamedDecl *D)
Definition: Quality.cpp:262
static std::pair< float, unsigned > uriProximity(llvm::StringRef SymbolURI, URIDistance *D)
Definition: Quality.cpp:320
bool isImplementationDetail(const Decl *D)
Returns true if the declaration is considered implementation detail based on heuristics.
Definition: AST.cpp:44
float NameMatch
0-1+ fuzzy-match score for unqualified name. Must be explicitly assigned.
Definition: Quality.h:89
Indicates if the symbol is deprecated.
Definition: Index.h:241
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const CodeCompletion &C)
const char * FileURI
Definition: Index.h:72
Attributes of a symbol-query pair that affect how much we like it.
Definition: Quality.h:87
static bool hasUsingDeclInMainFile(const CodeCompletionResult &R)
Definition: Quality.cpp:52