clang-tools  8.0.0
Dex.h
Go to the documentation of this file.
1 //===--- Dex.h - Dex Symbol Index Implementation ----------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// This defines Dex - a symbol index implementation based on query iterators
12 /// over symbol tokens, such as fuzzy matching trigrams, scopes, types, etc.
13 /// While consuming more memory and having longer build stage due to
14 /// preprocessing, Dex will have substantially lower latency. It will also allow
15 /// efficient symbol searching which is crucial for operations like code
16 /// completion, and can be very important for a number of different code
17 /// transformations which will be eventually supported by Clangd.
18 ///
19 //===----------------------------------------------------------------------===//
20 
21 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
22 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
23 
24 #include "Iterator.h"
25 #include "PostingList.h"
26 #include "Token.h"
27 #include "Trigram.h"
28 #include "index/Index.h"
29 #include "index/MemIndex.h"
30 #include "index/SymbolCollector.h"
31 
32 namespace clang {
33 namespace clangd {
34 namespace dex {
35 
36 /// In-memory Dex trigram-based index implementation.
37 // FIXME(kbobyrev): Introduce serialization and deserialization of the symbol
38 // index so that it can be loaded from the disk. Since static index is not
39 // changed frequently, it's safe to assume that it has to be built only once
40 // (when the clangd process starts). Therefore, it can be easier to store built
41 // index on disk and then load it if available.
42 class Dex : public SymbolIndex {
43 public:
44  // All data must outlive this index.
45  template <typename SymbolRange, typename RefsRange>
46  Dex(SymbolRange &&Symbols, RefsRange &&Refs) : Corpus(0) {
47  for (auto &&Sym : Symbols)
48  this->Symbols.push_back(&Sym);
49  for (auto &&Ref : Refs)
50  this->Refs.try_emplace(Ref.first, Ref.second);
51  buildIndex();
52  }
53  // Symbols and Refs are owned by BackingData, Index takes ownership.
54  template <typename SymbolRange, typename RefsRange, typename Payload>
55  Dex(SymbolRange &&Symbols, RefsRange &&Refs, Payload &&BackingData,
56  size_t BackingDataSize)
57  : Dex(std::forward<SymbolRange>(Symbols), std::forward<RefsRange>(Refs)) {
58  KeepAlive = std::shared_ptr<void>(
59  std::make_shared<Payload>(std::move(BackingData)), nullptr);
60  this->BackingDataSize = BackingDataSize;
61  }
62 
63  /// Builds an index from slabs. The index takes ownership of the slab.
64  static std::unique_ptr<SymbolIndex> build(SymbolSlab, RefSlab);
65 
66  bool
67  fuzzyFind(const FuzzyFindRequest &Req,
68  llvm::function_ref<void(const Symbol &)> Callback) const override;
69 
70  void lookup(const LookupRequest &Req,
71  llvm::function_ref<void(const Symbol &)> Callback) const override;
72 
73  void refs(const RefsRequest &Req,
74  llvm::function_ref<void(const Ref &)> Callback) const override;
75 
76  size_t estimateMemoryUsage() const override;
77 
78 private:
79  void buildIndex();
80  std::unique_ptr<Iterator> iterator(const Token &Tok) const;
81 
82  /// Stores symbols sorted in the descending order of symbol quality..
83  std::vector<const Symbol *> Symbols;
84  /// SymbolQuality[I] is the quality of Symbols[I].
85  std::vector<float> SymbolQuality;
86  llvm::DenseMap<SymbolID, const Symbol *> LookupTable;
87  /// Inverted index is a mapping from the search token to the posting list,
88  /// which contains all items which can be characterized by such search token.
89  /// For example, if the search token is scope "std::", the corresponding
90  /// posting list would contain all indices of symbols defined in namespace
91  /// std. Inverted index is used to retrieve posting lists which are processed
92  /// during the fuzzyFind process.
93  llvm::DenseMap<Token, PostingList> InvertedIndex;
95  llvm::DenseMap<SymbolID, llvm::ArrayRef<Ref>> Refs;
96  std::shared_ptr<void> KeepAlive; // poor man's move-only std::any
97  // Size of memory retained by KeepAlive.
98  size_t BackingDataSize = 0;
99 };
100 
101 /// Returns Search Token for a number of parent directories of given Path.
102 /// Should be used within the index build process.
103 ///
104 /// This function is exposed for testing only.
105 std::vector<std::string> generateProximityURIs(llvm::StringRef URIPath);
106 
107 } // namespace dex
108 } // namespace clangd
109 } // namespace clang
110 
111 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_DEX_H
Dex(SymbolRange &&Symbols, RefsRange &&Refs)
Definition: Dex.h:46
void refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
Definition: Dex.cpp:236
Interface for symbol indexes that can be used for searching or matching symbols among a set of symbol...
Definition: Index.h:487
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
Definition: Function.h:29
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
Definition: Dex.cpp:148
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
Definition: Token.h:41
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
Definition: Dex.cpp:261
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab)
Builds an index from slabs. The index takes ownership of the slab.
Definition: Dex.cpp:27
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol...
Definition: Dex.cpp:226
This defines posting list interface: a storage for identifiers of symbols which can be characterized ...
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
Trigrams are attributes of the symbol unqualified name used to effectively extract symbols which can ...
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
In-memory Dex trigram-based index implementation.
Definition: Dex.h:42
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Definition: Dex.cpp:250
Token objects represent a characteristic of a symbol, which can be used to perform efficient search...
Dex(SymbolRange &&Symbols, RefsRange &&Refs, Payload &&BackingData, size_t BackingDataSize)
Definition: Dex.h:55