clang-tools  8.0.0
Iterator.cpp
Go to the documentation of this file.
1 //===--- Iterator.cpp - Query Symbol Retrieval ------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "Iterator.h"
11 #include "llvm/Support/Casting.h"
12 #include <algorithm>
13 #include <cassert>
14 #include <numeric>
15 
16 namespace clang {
17 namespace clangd {
18 namespace dex {
19 namespace {
20 
21 /// Implements Iterator over the intersection of other iterators.
22 ///
23 /// AndIterator iterates through common items among all children. It becomes
24 /// exhausted as soon as any child becomes exhausted. After each mutation, the
25 /// iterator restores the invariant: all children must point to the same item.
26 class AndIterator : public Iterator {
27 public:
28  explicit AndIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
29  : Iterator(Kind::And), Children(std::move(AllChildren)) {
30  assert(!Children.empty() && "AND iterator should have at least one child.");
31  // Establish invariants.
32  for (const auto &Child : Children)
33  ReachedEnd |= Child->reachedEnd();
34  sync();
35  // When children are sorted by the estimateSize(), sync() calls are more
36  // effective. Each sync() starts with the first child and makes sure all
37  // children point to the same element. If any child is "above" the previous
38  // ones, the algorithm resets and and advances the children to the next
39  // highest element starting from the front. When child iterators in the
40  // beginning have smaller estimated size, the sync() will have less restarts
41  // and become more effective.
42  llvm::sort(Children, [](const std::unique_ptr<Iterator> &LHS,
43  const std::unique_ptr<Iterator> &RHS) {
44  return LHS->estimateSize() < RHS->estimateSize();
45  });
46  }
47 
48  bool reachedEnd() const override { return ReachedEnd; }
49 
50  /// Advances all children to the next common item.
51  void advance() override {
52  assert(!reachedEnd() && "AND iterator can't advance() at the end.");
53  Children.front()->advance();
54  sync();
55  }
56 
57  /// Advances all children to the next common item with DocumentID >= ID.
58  void advanceTo(DocID ID) override {
59  assert(!reachedEnd() && "AND iterator can't advanceTo() at the end.");
60  Children.front()->advanceTo(ID);
61  sync();
62  }
63 
64  DocID peek() const override { return Children.front()->peek(); }
65 
66  float consume() override {
67  assert(!reachedEnd() && "AND iterator can't consume() at the end.");
68  float Boost = 1;
69  for (const auto &Child : Children)
70  Boost *= Child->consume();
71  return Boost;
72  }
73 
74  size_t estimateSize() const override {
75  return Children.front()->estimateSize();
76  }
77 
78 private:
79  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
80  OS << "(& ";
81  auto Separator = "";
82  for (const auto &Child : Children) {
83  OS << Separator << *Child;
84  Separator = " ";
85  }
86  OS << ')';
87  return OS;
88  }
89 
90  /// Restores class invariants: each child will point to the same element after
91  /// sync.
92  void sync() {
93  ReachedEnd |= Children.front()->reachedEnd();
94  if (ReachedEnd)
95  return;
96  auto SyncID = Children.front()->peek();
97  // Indicates whether any child needs to be advanced to new SyncID.
98  bool NeedsAdvance = false;
99  do {
100  NeedsAdvance = false;
101  for (auto &Child : Children) {
102  Child->advanceTo(SyncID);
103  ReachedEnd |= Child->reachedEnd();
104  // If any child reaches end And iterator can not match any other items.
105  // In this case, just terminate the process.
106  if (ReachedEnd)
107  return;
108  // If any child goes beyond given ID (i.e. ID is not the common item),
109  // all children should be advanced to the next common item.
110  if (Child->peek() > SyncID) {
111  SyncID = Child->peek();
112  NeedsAdvance = true;
113  }
114  }
115  } while (NeedsAdvance);
116  }
117 
118  /// AndIterator owns its children and ensures that all of them point to the
119  /// same element. As soon as one child gets exhausted, AndIterator can no
120  /// longer advance and has reached its end.
121  std::vector<std::unique_ptr<Iterator>> Children;
122  /// Indicates whether any child is exhausted. It is cheaper to maintain and
123  /// update the field, rather than traversing the whole subtree in each
124  /// reachedEnd() call.
125  bool ReachedEnd = false;
126  friend Corpus; // For optimizations.
127 };
128 
129 /// Implements Iterator over the union of other iterators.
130 ///
131 /// OrIterator iterates through all items which can be pointed to by at least
132 /// one child. To preserve the sorted order, this iterator always advances the
133 /// child with smallest Child->peek() value. OrIterator becomes exhausted as
134 /// soon as all of its children are exhausted.
135 class OrIterator : public Iterator {
136 public:
137  explicit OrIterator(std::vector<std::unique_ptr<Iterator>> AllChildren)
138  : Iterator(Kind::Or), Children(std::move(AllChildren)) {
139  assert(!Children.empty() && "OR iterator should have at least one child.");
140  }
141 
142  /// Returns true if all children are exhausted.
143  bool reachedEnd() const override {
144  for (const auto &Child : Children)
145  if (!Child->reachedEnd())
146  return false;
147  return true;
148  }
149 
150  /// Moves each child pointing to the smallest DocID to the next item.
151  void advance() override {
152  assert(!reachedEnd() && "OR iterator can't advance() at the end.");
153  const auto SmallestID = peek();
154  for (const auto &Child : Children)
155  if (!Child->reachedEnd() && Child->peek() == SmallestID)
156  Child->advance();
157  }
158 
159  /// Advances each child to the next existing element with DocumentID >= ID.
160  void advanceTo(DocID ID) override {
161  assert(!reachedEnd() && "OR iterator can't advanceTo() at the end.");
162  for (const auto &Child : Children)
163  if (!Child->reachedEnd())
164  Child->advanceTo(ID);
165  }
166 
167  /// Returns the element under cursor of the child with smallest Child->peek()
168  /// value.
169  DocID peek() const override {
170  assert(!reachedEnd() && "OR iterator can't peek() at the end.");
171  DocID Result = std::numeric_limits<DocID>::max();
172 
173  for (const auto &Child : Children)
174  if (!Child->reachedEnd())
175  Result = std::min(Result, Child->peek());
176 
177  return Result;
178  }
179 
180  // Returns the maximum boosting score among all Children when iterator
181  // points to the current ID.
182  float consume() override {
183  assert(!reachedEnd() && "OR iterator can't consume() at the end.");
184  const DocID ID = peek();
185  float Boost = 1;
186  for (const auto &Child : Children)
187  if (!Child->reachedEnd() && Child->peek() == ID)
188  Boost = std::max(Boost, Child->consume());
189  return Boost;
190  }
191 
192  size_t estimateSize() const override {
193  size_t Size = 0;
194  for (const auto &Child : Children)
195  Size = std::max(Size, Child->estimateSize());
196  return Size;
197  }
198 
199 private:
200  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
201  OS << "(| ";
202  auto Separator = "";
203  for (const auto &Child : Children) {
204  OS << Separator << *Child;
205  Separator = " ";
206  }
207  OS << ')';
208  return OS;
209  }
210 
211  // FIXME(kbobyrev): Would storing Children in min-heap be faster?
212  std::vector<std::unique_ptr<Iterator>> Children;
213  friend Corpus; // For optimizations.
214 };
215 
216 /// TrueIterator handles PostingLists which contain all items of the index. It
217 /// stores size of the virtual posting list, and all operations are performed
218 /// in O(1).
219 class TrueIterator : public Iterator {
220 public:
221  explicit TrueIterator(DocID Size) : Iterator(Kind::True), Size(Size) {}
222 
223  bool reachedEnd() const override { return Index >= Size; }
224 
225  void advance() override {
226  assert(!reachedEnd() && "TRUE iterator can't advance() at the end.");
227  ++Index;
228  }
229 
230  void advanceTo(DocID ID) override {
231  assert(!reachedEnd() && "TRUE iterator can't advanceTo() at the end.");
232  Index = std::min(ID, Size);
233  }
234 
235  DocID peek() const override {
236  assert(!reachedEnd() && "TRUE iterator can't peek() at the end.");
237  return Index;
238  }
239 
240  float consume() override {
241  assert(!reachedEnd() && "TRUE iterator can't consume() at the end.");
242  return 1;
243  }
244 
245  size_t estimateSize() const override { return Size; }
246 
247 private:
248  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
249  return OS << "true";
250  }
251 
252  DocID Index = 0;
253  /// Size of the underlying virtual PostingList.
254  DocID Size;
255 };
256 
257 /// FalseIterator yields no results.
258 class FalseIterator : public Iterator {
259 public:
260  FalseIterator() : Iterator(Kind::False) {}
261  bool reachedEnd() const override { return true; }
262  void advance() override { assert(false); }
263  void advanceTo(DocID ID) override { assert(false); }
264  DocID peek() const override {
265  assert(false);
266  return 0;
267  }
268  float consume() override {
269  assert(false);
270  return 1;
271  }
272  size_t estimateSize() const override { return 0; }
273 
274 private:
275  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
276  return OS << "false";
277  }
278 };
279 
280 /// Boost iterator is a wrapper around its child which multiplies scores of
281 /// each retrieved item by a given factor.
282 class BoostIterator : public Iterator {
283 public:
284  BoostIterator(std::unique_ptr<Iterator> Child, float Factor)
285  : Child(std::move(Child)), Factor(Factor) {}
286 
287  bool reachedEnd() const override { return Child->reachedEnd(); }
288 
289  void advance() override { Child->advance(); }
290 
291  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
292 
293  DocID peek() const override { return Child->peek(); }
294 
295  float consume() override { return Child->consume() * Factor; }
296 
297  size_t estimateSize() const override { return Child->estimateSize(); }
298 
299 private:
300  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
301  return OS << "(* " << Factor << ' ' << *Child << ')';
302  }
303 
304  std::unique_ptr<Iterator> Child;
305  float Factor;
306 };
307 
308 /// This iterator limits the number of items retrieved from the child iterator
309 /// on top of the query tree. To ensure that query tree with LIMIT iterators
310 /// inside works correctly, users have to call Root->consume(Root->peek()) each
311 /// time item is retrieved at the root of query tree.
312 class LimitIterator : public Iterator {
313 public:
314  LimitIterator(std::unique_ptr<Iterator> Child, size_t Limit)
315  : Child(std::move(Child)), Limit(Limit), ItemsLeft(Limit) {}
316 
317  bool reachedEnd() const override {
318  return ItemsLeft == 0 || Child->reachedEnd();
319  }
320 
321  void advance() override { Child->advance(); }
322 
323  void advanceTo(DocID ID) override { Child->advanceTo(ID); }
324 
325  DocID peek() const override { return Child->peek(); }
326 
327  /// Decreases the limit in case the element consumed at top of the query tree
328  /// comes from the underlying iterator.
329  float consume() override {
330  assert(!reachedEnd() && "LimitIterator can't consume() at the end.");
331  --ItemsLeft;
332  return Child->consume();
333  }
334 
335  size_t estimateSize() const override {
336  return std::min(Child->estimateSize(), Limit);
337  }
338 
339 private:
340  llvm::raw_ostream &dump(llvm::raw_ostream &OS) const override {
341  return OS << "(LIMIT " << Limit << " " << *Child << ')';
342  }
343 
344  std::unique_ptr<Iterator> Child;
345  size_t Limit;
346  size_t ItemsLeft;
347 };
348 
349 } // end namespace
350 
351 std::vector<std::pair<DocID, float>> consume(Iterator &It) {
352  std::vector<std::pair<DocID, float>> Result;
353  for (; !It.reachedEnd(); It.advance())
354  Result.emplace_back(It.peek(), It.consume());
355  return Result;
356 }
357 
358 std::unique_ptr<Iterator>
359 Corpus::intersect(std::vector<std::unique_ptr<Iterator>> Children) const {
360  std::vector<std::unique_ptr<Iterator>> RealChildren;
361  for (auto &Child : Children) {
362  switch (Child->kind()) {
364  break; // No effect, drop the iterator.
366  return std::move(Child); // Intersection is empty.
367  case Iterator::Kind::And: {
368  // Inline nested AND into parent AND.
369  auto &NewChildren = static_cast<AndIterator *>(Child.get())->Children;
370  std::move(NewChildren.begin(), NewChildren.end(),
371  std::back_inserter(RealChildren));
372  break;
373  }
374  default:
375  RealChildren.push_back(std::move(Child));
376  }
377  }
378  switch (RealChildren.size()) {
379  case 0:
380  return all();
381  case 1:
382  return std::move(RealChildren.front());
383  default:
384  return llvm::make_unique<AndIterator>(std::move(RealChildren));
385  }
386 }
387 
388 std::unique_ptr<Iterator>
389 Corpus::unionOf(std::vector<std::unique_ptr<Iterator>> Children) const {
390  std::vector<std::unique_ptr<Iterator>> RealChildren;
391  for (auto &Child : Children) {
392  switch (Child->kind()) {
394  break; // No effect, drop the iterator.
395  case Iterator::Kind::Or: {
396  // Inline nested OR into parent OR.
397  auto &NewChildren = static_cast<OrIterator *>(Child.get())->Children;
398  std::move(NewChildren.begin(), NewChildren.end(),
399  std::back_inserter(RealChildren));
400  break;
401  }
403  // Don't return all(), which would discard sibling boosts.
404  default:
405  RealChildren.push_back(std::move(Child));
406  }
407  }
408  switch (RealChildren.size()) {
409  case 0:
410  return none();
411  case 1:
412  return std::move(RealChildren.front());
413  default:
414  return llvm::make_unique<OrIterator>(std::move(RealChildren));
415  }
416 }
417 
418 std::unique_ptr<Iterator> Corpus::all() const {
419  return llvm::make_unique<TrueIterator>(Size);
420 }
421 
422 std::unique_ptr<Iterator> Corpus::none() const {
423  return llvm::make_unique<FalseIterator>();
424 }
425 
426 std::unique_ptr<Iterator> Corpus::boost(std::unique_ptr<Iterator> Child,
427  float Factor) const {
428  if (Factor == 1)
429  return Child;
430  if (Child->kind() == Iterator::Kind::False)
431  return Child;
432  return llvm::make_unique<BoostIterator>(std::move(Child), Factor);
433 }
434 
435 std::unique_ptr<Iterator> Corpus::limit(std::unique_ptr<Iterator> Child,
436  size_t Limit) const {
437  if (Child->kind() == Iterator::Kind::False)
438  return Child;
439  return llvm::make_unique<LimitIterator>(std::move(Child), Limit);
440 }
441 
442 } // namespace dex
443 } // namespace clangd
444 } // namespace clang
std::unique_ptr< Iterator > intersect(std::vector< std::unique_ptr< Iterator >> Children) const
Returns AND Iterator which performs the intersection of the PostingLists of its children.
Definition: Iterator.cpp:359
virtual float consume()=0
Informs the iterator that the current document was consumed, and returns its boost.
Iterator is the interface for Query Tree node.
Definition: Iterator.h:55
BindArgumentKind Kind
virtual DocID peek() const =0
Returns the current element this iterator points to.
std::vector< std::pair< DocID, float > > consume(Iterator &It)
Advances the iterator until it is exhausted.
Definition: Iterator.cpp:351
std::unique_ptr< Iterator > unionOf(std::vector< std::unique_ptr< Iterator >> Children) const
Returns OR Iterator which performs the union of the PostingLists of its children. ...
Definition: Iterator.cpp:389
std::unique_ptr< Iterator > limit(std::unique_ptr< Iterator > Child, size_t Limit) const
Returns LIMIT iterator, which yields up to N elements of its child iterator.
Definition: Iterator.cpp:435
std::unique_ptr< Iterator > none() const
Returns FALSE Iterator which iterates over no documents.
Definition: Iterator.cpp:422
uint32_t DocID
Symbol position in the list of all index symbols sorted by a pre-computed symbol quality.
Definition: Iterator.h:47
virtual void advance()=0
Moves to next valid DocID.
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
virtual bool reachedEnd() const =0
Returns true if all valid DocIDs were processed and hence the iterator is exhausted.
Symbol index queries consist of specific requirements for the requested symbol, such as high fuzzy ma...
std::unique_ptr< Iterator > boost(std::unique_ptr< Iterator > Child, float Factor) const
Returns BOOST iterator which multiplies the score of each item by given factor.
Definition: Iterator.cpp:426
std::unique_ptr< Iterator > all() const
Returns TRUE Iterator which iterates over "virtual" PostingList containing all items in range [0...
Definition: Iterator.cpp:418
const SymbolIndex * Index
Definition: Dexp.cpp:85