18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Support/ScopedPrinter.h" 29 auto Data = std::make_pair(std::move(Symbols), std::move(Refs));
30 return llvm::make_unique<Dex>(Data.first, Data.second, std::move(Data), Size);
36 const Token RestrictedForCodeCompletion =
48 std::vector<Token> generateSearchTokens(
const Symbol &Sym) {
53 for (
const auto &ProximityURI :
57 Result.emplace_back(RestrictedForCodeCompletion);
62 std::unique_ptr<Iterator> createFileProximityIterator(
63 llvm::ArrayRef<std::string> ProximityPaths,
64 const llvm::DenseMap<Token, PostingList> &InvertedIndex,
66 std::vector<std::unique_ptr<Iterator>> BoostingIterators;
68 llvm::StringSet<> ParentURIs;
69 llvm::StringMap<SourceParams> Sources;
70 for (
const auto &
Path : ProximityPaths) {
74 for (
const auto &ProximityURI : PathProximityURIs)
75 ParentURIs.insert(ProximityURI);
87 for (
const auto &ParentURI : ParentURIs.keys()) {
89 const auto It = InvertedIndex.find(Tok);
90 if (It != InvertedIndex.end()) {
92 PathProximitySignals.
SymbolURI = ParentURI;
93 BoostingIterators.push_back(Corpus.
boost(
94 It->second.iterator(&It->first), PathProximitySignals.
evaluate()));
97 BoostingIterators.push_back(Corpus.
all());
98 return Corpus.
unionOf(std::move(BoostingIterators));
103 void Dex::buildIndex() {
105 std::vector<std::pair<float, const Symbol *>> ScoredSymbols(Symbols.size());
107 for (
size_t I = 0; I < Symbols.size(); ++I) {
108 const Symbol *Sym = Symbols[I];
109 LookupTable[Sym->
ID] = Sym;
110 ScoredSymbols[I] = {
quality(*Sym), Sym};
115 llvm::sort(ScoredSymbols, std::greater<std::pair<float, const Symbol *>>());
118 SymbolQuality.resize(Symbols.size());
120 for (
size_t I = 0; I < ScoredSymbols.size(); ++I) {
121 SymbolQuality[I] = ScoredSymbols[I].first;
122 Symbols[I] = ScoredSymbols[I].second;
126 llvm::DenseMap<Token, std::vector<DocID>> TempInvertedIndex;
127 for (
DocID SymbolRank = 0; SymbolRank < Symbols.size(); ++SymbolRank) {
128 const auto *Sym = Symbols[SymbolRank];
129 for (
const auto &
Token : generateSearchTokens(*Sym))
130 TempInvertedIndex[
Token].push_back(SymbolRank);
134 for (
const auto &TokenToPostingList : TempInvertedIndex)
135 InvertedIndex.insert(
136 {TokenToPostingList.first,
PostingList(TokenToPostingList.second)});
139 std::unique_ptr<Iterator> Dex::iterator(
const Token &Tok)
const {
140 auto It = InvertedIndex.find(Tok);
141 return It == InvertedIndex.end() ?
Corpus.
none()
142 : It->second.iterator(&It->first);
150 assert(!StringRef(Req.
Query).contains(
"::") &&
151 "There must be no :: in query.");
156 bool More = !Req.
Query.empty() && Req.
Query.size() < 3;
158 std::vector<std::unique_ptr<Iterator>> Criteria;
163 std::vector<std::unique_ptr<Iterator>> TrigramIterators;
164 for (
const auto &Trigram : TrigramTokens)
165 TrigramIterators.push_back(iterator(Trigram));
169 std::vector<std::unique_ptr<Iterator>> ScopeIterators;
170 for (
const auto &Scope : Req.
Scopes)
173 ScopeIterators.push_back(
182 Criteria.push_back(iterator(RestrictedForCodeCompletion));
192 SPAN_ATTACH(Tracer,
"query", llvm::to_string(*Root));
193 vlog(
"Dex query tree: {0}", *Root);
195 using IDAndScore = std::pair<DocID, float>;
196 std::vector<IDAndScore> IDAndScores =
consume(*Root);
198 auto Compare = [](
const IDAndScore &LHS,
const IDAndScore &RHS) {
199 return LHS.second > RHS.second;
202 Req.
Limit ? *Req.
Limit : std::numeric_limits<size_t>::max(), Compare);
203 for (
const auto &IDAndScore : IDAndScores) {
204 const DocID SymbolDocID = IDAndScore.first;
205 const auto *Sym = Symbols[SymbolDocID];
206 const llvm::Optional<float> Score = Filter.
match(Sym->Name);
211 const float FinalScore =
212 (*Score) * SymbolQuality[SymbolDocID] * IDAndScore.second;
215 if (Top.
push({SymbolDocID, FinalScore}))
221 for (
const auto &Item : std::move(Top).items())
229 for (
const auto &ID : Req.
IDs) {
230 auto I = LookupTable.find(ID);
231 if (I != LookupTable.end())
237 llvm::function_ref<
void(
const Ref &)>
Callback)
const {
240 Req.
Limit.getValueOr(std::numeric_limits<uint32_t>::max());
241 for (
const auto &ID : Req.
IDs)
242 for (
const auto &
Ref : Refs.lookup(ID)) {
243 if (Remaining > 0 && static_cast<int>(Req.
Filter &
Ref.
Kind)) {
251 size_t Bytes = Symbols.size() *
sizeof(
const Symbol *);
252 Bytes += SymbolQuality.size() *
sizeof(float);
253 Bytes += LookupTable.getMemorySize();
254 Bytes += InvertedIndex.getMemorySize();
255 for (
const auto &TokenToPostingList : InvertedIndex)
256 Bytes += TokenToPostingList.second.bytes();
257 Bytes += Refs.getMemorySize();
258 return Bytes + BackingDataSize;
262 std::vector<std::string>
Result;
265 "Non-empty argument of generateProximityURIs() should be a valid " 267 llvm::StringRef Body = ParsedURI->body();
276 Result.emplace_back(ParsedURI->toString());
277 while (!Body.empty() && --Limit > 0) {
280 Body = llvm::sys::path::parent_path(Body, llvm::sys::path::Style::posix);
281 URI TokenURI(ParsedURI->scheme(), ParsedURI->authority(), Body);
283 Result.emplace_back(TokenURI.toString());
llvm::DenseSet< SymbolID > IDs
bool AnyScope
If set to true, allow symbols from any scope.
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
void refs(const RefsRequest &Req, llvm::function_ref< void(const Ref &)> Callback) const override
Finds all occurrences (e.g.
bool RestrictForCodeCompletion
If set to true, only symbols for completion support will be considered.
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
PostingList is the storage of DocIDs which can be inserted to the Query Tree as a leaf by constructin...
llvm::DenseSet< SymbolID > IDs
llvm::unique_function< void(llvm::Expected< T >)> Callback
A Callback<T> is a void function that accepts Expected<T>.
std::vector< Token > generateIdentifierTrigrams(llvm::StringRef Identifier)
Returns list of unique fuzzy-search trigrams from unqualified symbol.
bool fuzzyFind(const FuzzyFindRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Constructs iterators over tokens extracted from the query and exhausts it while applying Callback to ...
void vlog(const char *Fmt, Ts &&... Vals)
std::vector< std::string > Scopes
If this is non-empty, symbols must be in at least one of the scopes (e.g.
A Token represents an attribute of a symbol, such as a particular trigram present in the name (used f...
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children. ...
URIDistance * FileProximityMatch
Whether or not this symbol is meant to be used for the code completion.
std::vector< std::string > generateProximityURIs(llvm::StringRef URIPath)
Returns Search Token for a number of parent directories of given Path.
std::vector< Token > generateQueryTrigrams(llvm::StringRef Query)
Returns list of unique fuzzy-search trigrams given a query.
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
static std::unique_ptr< SymbolIndex > build(SymbolSlab, RefSlab)
Builds an index from slabs. The index takes ownership of the slab.
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
std::string Path
A typedef to represent a file path.
std::string Query
A query string for the fuzzy find.
llvm::Optional< float > match(llvm::StringRef Word)
SymbolLocation CanonicalDeclaration
bool push(value_type &&V)
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
void lookup(const LookupRequest &Req, llvm::function_ref< void(const Symbol &)> Callback) const override
Looks up symbols with any of the given symbol IDs and applies Callback on each matched symbol...
llvm::StringRef SymbolURI
These are used to calculate proximity between the index symbol and the query.
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::vector< std::string > ProximityPaths
Contextually relevant files (e.g.
Path Proximity URI to symbol declaration.
llvm::Optional< uint32_t > Limit
If set, limit the number of refers returned from the index.
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
A URI describes the location of a source file.
llvm::Optional< uint32_t > Limit
The number of top candidates to return.
Internal Token type for invalid/special tokens, e.g.
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0...
size_t estimateMemoryUsage() const override
Returns estimated size of index (in bytes).
Records an event whose duration is the lifetime of the Span object.
Attributes of a symbol-query pair that affect how much we like it.
#define SPAN_ATTACH(S, Name, Expr)
Attach a key-value pair to a Span event.
float quality(const Symbol &S)
TopN<T> is a lossy container that preserves only the "best" N elements.