clang-tools  8.0.0
Dexp.cpp
Go to the documentation of this file.
1 //===--- Dexp.cpp - Dex EXPloration tool ------------------------*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file implements a simple interactive tool which can be used to manually
11 // evaluate symbol search quality of Clangd index.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "SourceCode.h"
16 #include "index/Serialization.h"
17 #include "index/dex/Dex.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringRef.h"
20 #include "llvm/ADT/StringSwitch.h"
21 #include "llvm/LineEditor/LineEditor.h"
22 #include "llvm/Support/CommandLine.h"
23 #include "llvm/Support/Signals.h"
24 
25 namespace clang {
26 namespace clangd {
27 namespace {
28 
29 llvm::cl::opt<std::string> IndexPath("index-path",
30  llvm::cl::desc("Path to the index"),
31  llvm::cl::Positional, llvm::cl::Required);
32 
33 static const std::string Overview = R"(
34 This is an **experimental** interactive tool to process user-provided search
35 queries over given symbol collection obtained via clangd-indexer. The
36 tool can be used to evaluate search quality of existing index implementations
37 and manually construct non-trivial test cases.
38 
39 Type use "help" request to get information about the details.
40 )";
41 
42 void reportTime(llvm::StringRef Name, llvm::function_ref<void()> F) {
43  const auto TimerStart = std::chrono::high_resolution_clock::now();
44  F();
45  const auto TimerStop = std::chrono::high_resolution_clock::now();
46  const auto Duration = std::chrono::duration_cast<std::chrono::milliseconds>(
47  TimerStop - TimerStart);
48  llvm::outs() << llvm::formatv("{0} took {1:ms+n}.\n", Name, Duration);
49 }
50 
51 std::vector<SymbolID> getSymbolIDsFromIndex(llvm::StringRef QualifiedName,
52  const SymbolIndex *Index) {
53  FuzzyFindRequest Request;
54  // Remove leading "::" qualifier as FuzzyFind doesn't need leading "::"
55  // qualifier for global scope.
56  bool IsGlobalScope = QualifiedName.consume_front("::");
57  auto Names = splitQualifiedName(QualifiedName);
58  if (IsGlobalScope || !Names.first.empty())
59  Request.Scopes = {Names.first};
60  else
61  // QualifiedName refers to a symbol in global scope (e.g. "GlobalSymbol"),
62  // add the global scope to the request.
63  Request.Scopes = {""};
64 
65  Request.Query = Names.second;
66  std::vector<SymbolID> SymIDs;
67  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
68  std::string SymQualifiedName = (Sym.Scope + Sym.Name).str();
69  if (QualifiedName == SymQualifiedName)
70  SymIDs.push_back(Sym.ID);
71  });
72  return SymIDs;
73 }
74 
75 // REPL commands inherit from Command and contain their options as members.
76 // Creating a Command populates parser options, parseAndRun() resets them.
77 class Command {
78  // By resetting the parser options, we lost the standard -help flag.
79  llvm::cl::opt<bool, false, llvm::cl::parser<bool>> Help{
80  "help", llvm::cl::desc("Display available options"),
81  llvm::cl::ValueDisallowed, llvm::cl::cat(llvm::cl::GeneralCategory)};
82  virtual void run() = 0;
83 
84 protected:
85  const SymbolIndex *Index;
86 
87 public:
88  virtual ~Command() = default;
89  virtual void parseAndRun(llvm::ArrayRef<const char *> Argv,
90  const char *Overview, const SymbolIndex &Index) {
91  std::string ParseErrs;
92  llvm::raw_string_ostream OS(ParseErrs);
93  bool Ok = llvm::cl::ParseCommandLineOptions(Argv.size(), Argv.data(),
94  Overview, &OS);
95  if (Help.getNumOccurrences() > 0) {
96  // Avoid printing parse errors in this case.
97  // (Well, in theory. A bunch get printed to llvm::errs() regardless!)
98  llvm::cl::PrintHelpMessage();
99  } else {
100  llvm::outs() << OS.str();
101  if (Ok) {
102  this->Index = &Index;
103  reportTime(Argv[0], [&] { run(); });
104  }
105  }
106  llvm::cl::ResetCommandLineParser(); // must do this before opts are
107  // destroyed.
108  }
109 };
110 
111 // FIXME(kbobyrev): Ideas for more commands:
112 // * load/swap/reload index: this would make it possible to get rid of llvm::cl
113 // usages in the tool driver and actually use llvm::cl library in the REPL.
114 // * show posting list density histogram (our dump data somewhere so that user
115 // could build one)
116 // * show number of tokens of each kind
117 // * print out tokens with the most dense posting lists
118 // * print out tokens with least dense posting lists
119 
120 class FuzzyFind : public Command {
121  llvm::cl::opt<std::string> Query{
122  "query",
123  llvm::cl::Positional,
124  llvm::cl::Required,
125  llvm::cl::desc("Query string to be fuzzy-matched"),
126  };
127  llvm::cl::opt<std::string> Scopes{
128  "scopes",
129  llvm::cl::desc("Allowed symbol scopes (comma-separated list)"),
130  };
131  llvm::cl::opt<unsigned> Limit{
132  "limit",
133  llvm::cl::init(10),
134  llvm::cl::desc("Max results to display"),
135  };
136 
137  void run() override {
138  FuzzyFindRequest Request;
139  Request.Limit = Limit;
140  Request.Query = Query;
141  if (Scopes.getNumOccurrences() > 0) {
142  llvm::SmallVector<llvm::StringRef, 8> Scopes;
143  llvm::StringRef(this->Scopes).split(Scopes, ',');
144  Request.Scopes = {Scopes.begin(), Scopes.end()};
145  }
146  Request.AnyScope = Request.Scopes.empty();
147  // FIXME(kbobyrev): Print symbol final scores to see the distribution.
148  static const auto OutputFormat = "{0,-4} | {1,-40} | {2,-25}\n";
149  llvm::outs() << llvm::formatv(OutputFormat, "Rank", "Symbol ID",
150  "Symbol Name");
151  size_t Rank = 0;
152  Index->fuzzyFind(Request, [&](const Symbol &Sym) {
153  llvm::outs() << llvm::formatv(OutputFormat, Rank++, Sym.ID.str(),
154  Sym.Scope + Sym.Name);
155  });
156  }
157 };
158 
159 class Lookup : public Command {
160  llvm::cl::opt<std::string> ID{
161  "id",
162  llvm::cl::Positional,
163  llvm::cl::desc("Symbol ID to look up (hex)"),
164  };
165  llvm::cl::opt<std::string> Name{
166  "name",
167  llvm::cl::desc("Qualified name to look up."),
168  };
169 
170  void run() override {
171  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
172  llvm::outs()
173  << "Missing required argument: please provide id or -name.\n";
174  return;
175  }
176  std::vector<SymbolID> IDs;
177  if (ID.getNumOccurrences()) {
178  auto SID = SymbolID::fromStr(ID);
179  if (!SID) {
180  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
181  return;
182  }
183  IDs.push_back(*SID);
184  } else {
185  IDs = getSymbolIDsFromIndex(Name, Index);
186  }
187 
188  LookupRequest Request;
189  Request.IDs.insert(IDs.begin(), IDs.end());
190  bool FoundSymbol = false;
191  Index->lookup(Request, [&](const Symbol &Sym) {
192  FoundSymbol = true;
193  llvm::outs() << toYAML(Sym);
194  });
195  if (!FoundSymbol)
196  llvm::outs() << "not found\n";
197  }
198 };
199 
200 class Refs : public Command {
201  llvm::cl::opt<std::string> ID{
202  "id",
203  llvm::cl::Positional,
204  llvm::cl::desc("Symbol ID of the symbol being queried (hex)."),
205  };
206  llvm::cl::opt<std::string> Name{
207  "name",
208  llvm::cl::desc("Qualified name of the symbol being queried."),
209  };
210  llvm::cl::opt<std::string> Filter{
211  "filter",
212  llvm::cl::init(".*"),
213  llvm::cl::desc(
214  "Print all results from files matching this regular expression."),
215  };
216 
217  void run() override {
218  if (ID.getNumOccurrences() == 0 && Name.getNumOccurrences() == 0) {
219  llvm::outs()
220  << "Missing required argument: please provide id or -name.\n";
221  return;
222  }
223  std::vector<SymbolID> IDs;
224  if (ID.getNumOccurrences()) {
225  auto SID = SymbolID::fromStr(ID);
226  if (!SID) {
227  llvm::outs() << llvm::toString(SID.takeError()) << "\n";
228  return;
229  }
230  IDs.push_back(*SID);
231  } else {
232  IDs = getSymbolIDsFromIndex(Name, Index);
233  if (IDs.size() > 1) {
234  llvm::outs() << llvm::formatv(
235  "The name {0} is ambiguous, found {1} different "
236  "symbols. Please use id flag to disambiguate.\n",
237  Name, IDs.size());
238  return;
239  }
240  }
241  RefsRequest RefRequest;
242  RefRequest.IDs.insert(IDs.begin(), IDs.end());
243  llvm::Regex RegexFilter(Filter);
244  Index->refs(RefRequest, [&RegexFilter](const Ref &R) {
245  auto U = URI::parse(R.Location.FileURI);
246  if (!U) {
247  llvm::outs() << U.takeError();
248  return;
249  }
250  if (RegexFilter.match(U->body()))
251  llvm::outs() << R << "\n";
252  });
253  }
254 };
255 
256 struct {
257  const char *Name;
258  const char *Description;
259  std::function<std::unique_ptr<Command>()> Implementation;
260 } CommandInfo[] = {
261  {"find", "Search for symbols with fuzzyFind", llvm::make_unique<FuzzyFind>},
262  {"lookup", "Dump symbol details by ID or qualified name",
263  llvm::make_unique<Lookup>},
264  {"refs", "Find references by ID or qualified name",
265  llvm::make_unique<Refs>},
266 };
267 
268 std::unique_ptr<SymbolIndex> openIndex(llvm::StringRef Index) {
269  return loadIndex(Index, /*UseDex=*/true);
270 }
271 
272 } // namespace
273 } // namespace clangd
274 } // namespace clang
275 
276 int main(int argc, const char *argv[]) {
277  using namespace clang::clangd;
278 
279  llvm::cl::ParseCommandLineOptions(argc, argv, Overview);
280  llvm::cl::ResetCommandLineParser(); // We reuse it for REPL commands.
281  llvm::sys::PrintStackTraceOnErrorSignal(argv[0]);
282 
283  std::unique_ptr<SymbolIndex> Index;
284  reportTime("Dex build", [&]() {
285  Index = openIndex(IndexPath);
286  });
287 
288  if (!Index) {
289  llvm::outs() << "Failed to open the index.\n";
290  return -1;
291  }
292 
293  llvm::LineEditor LE("dexp");
294 
295  while (llvm::Optional<std::string> Request = LE.readLine()) {
296  // Split on spaces and add required null-termination.
297  std::replace(Request->begin(), Request->end(), ' ', '\0');
298  llvm::SmallVector<llvm::StringRef, 8> Args;
299  llvm::StringRef(*Request).split(Args, '\0', /*MaxSplit=*/-1,
300  /*KeepEmpty=*/false);
301  if (Args.empty())
302  continue;
303  if (Args.front() == "help") {
304  llvm::outs() << "dexp - Index explorer\nCommands:\n";
305  for (const auto &C : CommandInfo)
306  llvm::outs() << llvm::formatv("{0,16} - {1}\n", C.Name, C.Description);
307  llvm::outs() << "Get detailed command help with e.g. `find -help`.\n";
308  continue;
309  }
310  llvm::SmallVector<const char *, 8> FakeArgv;
311  for (llvm::StringRef S : Args)
312  FakeArgv.push_back(S.data()); // Terminated by separator or end of string.
313 
314  bool Recognized = false;
315  for (const auto &Cmd : CommandInfo) {
316  if (Cmd.Name == Args.front()) {
317  Recognized = true;
318  Cmd.Implementation()->parseAndRun(FakeArgv, Cmd.Description, *Index);
319  break;
320  }
321  }
322  if (!Recognized)
323  llvm::outs() << "Unknown command. Try 'help'.\n";
324  }
325 
326  return 0;
327 }
const tooling::CompileCommand & Command
std::function< std::unique_ptr< Command >)> Implementation
Definition: Dexp.cpp:259
std::unique_ptr< SymbolIndex > loadIndex(llvm::StringRef SymbolFilename, bool UseDex)
This defines Dex - a symbol index implementation based on query iterators over symbol tokens...
static llvm::Expected< SymbolID > fromStr(llvm::StringRef)
Definition: SymbolID.cpp:36
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
std::string toYAML(const Symbol &)
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
std::pair< llvm::StringRef, llvm::StringRef > splitQualifiedName(llvm::StringRef QName)
From "a::b::c", return {"a::b::", "c"}.
Definition: SourceCode.cpp:164
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:166
const char * Description
Definition: Dexp.cpp:258
int main(int argc, const char *argv[])
Definition: Dexp.cpp:276
const SymbolIndex * Index
Definition: Dexp.cpp:85