clang-tools  8.0.0
Iterator.h
Go to the documentation of this file.
1 //===--- Iterator.h - Query Symbol Retrieval --------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 ///
10 /// \file
11 /// Symbol index queries consist of specific requirements for the requested
12 /// symbol, such as high fuzzy matching score, scope, type etc. The lists of all
13 /// symbols matching some criteria (e.g. belonging to "clang::clangd::" scope)
14 /// are expressed in a form of Search Tokens which are stored in the inverted
15 /// index. Inverted index maps these tokens to the posting lists - sorted (by
16 /// symbol quality) sequences of symbol IDs matching the token, e.g. scope token
17 /// "clangd::clangd::" is mapped to the list of IDs of all symbols which are
18 /// declared in this namespace. Search queries are build from a set of
19 /// requirements which can be combined with each other forming the query trees.
20 /// The leafs of such trees are posting lists, and the nodes are operations on
21 /// these posting lists, e.g. intersection or union. Efficient processing of
22 /// these multi-level queries is handled by Iterators. Iterators advance through
23 /// all leaf posting lists producing the result of search query, which preserves
24 /// the sorted order of IDs. Having the resulting IDs sorted is important,
25 /// because it allows receiving a certain number of the most valuable items
26 /// (e.g. symbols with highest quality which was the sorting key in the first
27 /// place) without processing all items with requested properties (this might
28 /// not be computationally effective if search request is not very restrictive).
29 //
30 //===----------------------------------------------------------------------===//
31 
32 #ifndef LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
33 #define LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
34 
35 #include "llvm/ADT/ArrayRef.h"
36 #include "llvm/Support/raw_ostream.h"
37 #include <algorithm>
38 #include <memory>
39 #include <vector>
40 
41 namespace clang {
42 namespace clangd {
43 namespace dex {
44 
45 /// Symbol position in the list of all index symbols sorted by a pre-computed
46 /// symbol quality.
47 using DocID = uint32_t;
48 
49 /// Iterator is the interface for Query Tree node. The simplest type of Iterator
50 /// is DocumentIterator which is simply a wrapper around PostingList iterator
51 /// and serves as the Query Tree leaf. More sophisticated examples of iterators
52 /// can manage intersection, union of the elements produced by other iterators
53 /// (their children) to form a multi-level Query Tree. The interface is designed
54 /// to be extensible in order to support multiple types of iterators.
55 class Iterator {
56 public:
57  /// Returns true if all valid DocIDs were processed and hence the iterator is
58  /// exhausted.
59  virtual bool reachedEnd() const = 0;
60  /// Moves to next valid DocID. If it doesn't exist, the iterator is exhausted
61  /// and proceeds to the END.
62  ///
63  /// Note: reachedEnd() must be false.
64  virtual void advance() = 0;
65  /// Moves to the first valid DocID which is equal or higher than given ID. If
66  /// it doesn't exist, the iterator is exhausted and proceeds to the END.
67  ///
68  /// Note: reachedEnd() must be false.
69  virtual void advanceTo(DocID ID) = 0;
70  /// Returns the current element this iterator points to.
71  ///
72  /// Note: reachedEnd() must be false.
73  virtual DocID peek() const = 0;
74  /// Informs the iterator that the current document was consumed, and returns
75  /// its boost.
76  ///
77  /// Note: If this iterator has any child iterators that contain the document,
78  /// consume() should be called on those and their boosts incorporated.
79  /// consume() must *not* be called on children that don't contain the current
80  /// doc.
81  virtual float consume() = 0;
82  /// Returns an estimate of advance() calls before the iterator is exhausted.
83  virtual size_t estimateSize() const = 0;
84 
85  virtual ~Iterator() {}
86 
87  /// Prints a convenient human-readable iterator representation by recursively
88  /// dumping iterators in the following format:
89  ///
90  /// (Type Child1 Child2 ...)
91  ///
92  /// Where Type is the iterator type representation: "&" for And, "|" for Or,
93  /// ChildN is N-th iterator child. Raw iterators over PostingList are
94  /// represented as "[... CurID ...]" where CurID is the current PostingList
95  /// entry being inspected.
96  friend llvm::raw_ostream &operator<<(llvm::raw_ostream &OS,
97  const Iterator &Iterator) {
98  return Iterator.dump(OS);
99  }
100 
101  /// Inspect iterator type, used internally for optimizing query trees.
102  enum class Kind { And, Or, True, False, Other };
103  Kind kind() const { return MyKind; }
104 
105 protected:
106  Iterator(Kind MyKind = Kind::Other) : MyKind(MyKind) {}
107 
108 private:
109  virtual llvm::raw_ostream &dump(llvm::raw_ostream &OS) const = 0;
110  Kind MyKind;
111 };
112 
113 /// Advances the iterator until it is exhausted. Returns pairs of document IDs
114 /// with the corresponding boosting score.
115 ///
116 /// Boosting can be seen as a compromise between retrieving too many items and
117 /// calculating finals score for each of them (which might be very expensive)
118 /// and not retrieving enough items so that items with very high final score
119 /// would not be processed. Boosting score is a computationally efficient way
120 /// to acquire preliminary scores of requested items.
121 std::vector<std::pair<DocID, float>> consume(Iterator &It);
122 
123 namespace detail {
124 // Variadic template machinery.
125 inline void populateChildren(std::vector<std::unique_ptr<Iterator>> &) {}
126 template <typename... TailT>
127 void populateChildren(std::vector<std::unique_ptr<Iterator>> &Children,
128  std::unique_ptr<Iterator> Head, TailT... Tail) {
129  Children.push_back(move(Head));
130  populateChildren(Children, move(Tail)...);
131 }
132 } // namespace detail
133 
134 // A corpus is a set of documents, and a factory for iterators over them.
135 class Corpus {
136  DocID Size;
137 
138 public:
139  explicit Corpus(DocID Size) : Size(Size) {}
140 
141  /// Returns AND Iterator which performs the intersection of the PostingLists
142  /// of its children.
143  ///
144  /// consume(): AND Iterator returns the product of Childrens' boosting
145  /// scores.
146  std::unique_ptr<Iterator>
147  intersect(std::vector<std::unique_ptr<Iterator>> Children) const;
148 
149  /// Returns OR Iterator which performs the union of the PostingLists of its
150  /// children.
151  ///
152  /// consume(): OR Iterator returns the highest boost value among children
153  /// containing the requested item.
154  std::unique_ptr<Iterator>
155  unionOf(std::vector<std::unique_ptr<Iterator>> Children) const;
156 
157  /// Returns TRUE Iterator which iterates over "virtual" PostingList
158  /// containing all items in range [0, Size) in an efficient manner.
159  std::unique_ptr<Iterator> all() const;
160 
161  /// Returns FALSE Iterator which iterates over no documents.
162  std::unique_ptr<Iterator> none() const;
163 
164  /// Returns BOOST iterator which multiplies the score of each item by given
165  /// factor. Boosting can be used as a computationally inexpensive filtering.
166  /// Users can return significantly more items using consumeAndBoost() and
167  /// then trim Top K using retrieval score.
168  std::unique_ptr<Iterator> boost(std::unique_ptr<Iterator> Child,
169  float Factor) const;
170 
171  /// Returns LIMIT iterator, which yields up to N elements of its child
172  /// iterator. Elements only count towards the limit if they are part of the
173  /// final result set. Therefore the following iterator (AND (2) (LIMIT (1 2)
174  /// 1)) yields (2), not ().
175  std::unique_ptr<Iterator> limit(std::unique_ptr<Iterator> Child,
176  size_t Limit) const;
177 
178  /// This allows intersect(create(...), create(...)) syntax.
179  template <typename... Args>
180  std::unique_ptr<Iterator> intersect(Args... args) const {
181  std::vector<std::unique_ptr<Iterator>> Children;
182  detail::populateChildren(Children, std::forward<Args>(args)...);
183  return intersect(move(Children));
184  }
185 
186  /// This allows unionOf(create(...), create(...)) syntax.
187  template <typename... Args>
188  std::unique_ptr<Iterator> unionOf(Args... args) const {
189  std::vector<std::unique_ptr<Iterator>> Children;
190  detail::populateChildren(Children, std::forward<Args>(args)...);
191  return unionOf(move(Children));
192  }
193 };
194 
195 } // namespace dex
196 } // namespace clangd
197 } // namespace clang
198 
199 #endif // LLVM_CLANG_TOOLS_EXTRA_CLANGD_INDEX_DEX_ITERATOR_H
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
Iterator is the interface for Query Tree node.
Definition: Iterator.h:55
virtual DocID peek() const =0
Returns the current element this iterator points to.
Kind
Inspect iterator type, used internally for optimizing query trees.
Definition: Iterator.h:102
virtual size_t estimateSize() const =0
Returns an estimate of advance() calls before the iterator is exhausted.
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:47
virtual void advance()=0
Moves to next valid DocID.
void populateChildren(std::vector< std::unique_ptr< Iterator >> &)
Definition: Iterator.h:125
Iterator(Kind MyKind=Kind::Other)
Definition: Iterator.h:106
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
virtual void advanceTo(DocID ID)=0
Moves to the first valid DocID which is equal or higher than given ID.
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
void populateChildren(std::vector< std::unique_ptr< Iterator >> &Children, std::unique_ptr< Iterator > Head, TailT... Tail)
Definition: Iterator.h:127
std::unique_ptr< Iterator > intersect(Args... args) const
This allows intersect(create(...), create(...)) syntax.
Definition: Iterator.h:180
std::unique_ptr< Iterator > unionOf(Args... args) const
This allows unionOf(create(...), create(...)) syntax.
Definition: Iterator.h:188
friend llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, const Iterator &Iterator)
Prints a convenient human-readable iterator representation by recursively dumping iterators in the fo...
Definition: Iterator.h:96