clang-tools  8.0.0
URI.cpp
Go to the documentation of this file.
1 //===---- URI.h - File URIs with schemes -------------------------*- C++-*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 
10 #include "URI.h"
11 #include "llvm/ADT/StringExtras.h"
12 #include "llvm/ADT/Twine.h"
13 #include "llvm/Support/Error.h"
14 #include "llvm/Support/Format.h"
15 #include "llvm/Support/FormatVariadic.h"
16 #include "llvm/Support/Path.h"
17 #include <algorithm>
18 #include <iomanip>
19 #include <sstream>
20 
21 LLVM_INSTANTIATE_REGISTRY(clang::clangd::URISchemeRegistry)
22 
23 namespace clang {
24 namespace clangd {
25 namespace {
26 
27 inline llvm::Error make_string_error(const llvm::Twine &Message) {
28  return llvm::make_error<llvm::StringError>(Message,
29  llvm::inconvertibleErrorCode());
30 }
31 
32 /// \brief This manages file paths in the file system. All paths in the scheme
33 /// are absolute (with leading '/').
34 /// Note that this scheme is hardcoded into the library and not registered in
35 /// registry.
36 class FileSystemScheme : public URIScheme {
37 public:
38  llvm::Expected<std::string>
39  getAbsolutePath(llvm::StringRef /*Authority*/, llvm::StringRef Body,
40  llvm::StringRef /*HintPath*/) const override {
41  if (!Body.startswith("/"))
42  return make_string_error("File scheme: expect body to be an absolute "
43  "path starting with '/': " +
44  Body);
45  // For Windows paths e.g. /X:
46  if (Body.size() > 2 && Body[0] == '/' && Body[2] == ':')
47  Body.consume_front("/");
48  llvm::SmallVector<char, 16> Path(Body.begin(), Body.end());
49  llvm::sys::path::native(Path);
50  return std::string(Path.begin(), Path.end());
51  }
52 
53  llvm::Expected<URI>
54  uriFromAbsolutePath(llvm::StringRef AbsolutePath) const override {
55  std::string Body;
56  // For Windows paths e.g. X:
57  if (AbsolutePath.size() > 1 && AbsolutePath[1] == ':')
58  Body = "/";
59  Body += llvm::sys::path::convert_to_slash(AbsolutePath);
60  return URI("file", /*Authority=*/"", Body);
61  }
62 };
63 
64 llvm::Expected<std::unique_ptr<URIScheme>>
65 findSchemeByName(llvm::StringRef Scheme) {
66  if (Scheme == "file")
67  return llvm::make_unique<FileSystemScheme>();
68 
69  for (auto I = URISchemeRegistry::begin(), E = URISchemeRegistry::end();
70  I != E; ++I) {
71  if (I->getName() != Scheme)
72  continue;
73  return I->instantiate();
74  }
75  return make_string_error("Can't find scheme: " + Scheme);
76 }
77 
78 bool shouldEscape(unsigned char C) {
79  // Unreserved characters.
80  if ((C >= 'a' && C <= 'z') || (C >= 'A' && C <= 'Z') ||
81  (C >= '0' && C <= '9'))
82  return false;
83  switch (C) {
84  case '-':
85  case '_':
86  case '.':
87  case '~':
88  case '/': // '/' is only reserved when parsing.
89  // ':' is only reserved for relative URI paths, which clangd doesn't produce.
90  case ':':
91  return false;
92  }
93  return true;
94 }
95 
96 /// Encodes a string according to percent-encoding.
97 /// - Unreserved characters are not escaped.
98 /// - Reserved characters always escaped with exceptions like '/'.
99 /// - All other characters are escaped.
100 std::string percentEncode(llvm::StringRef Content) {
101  std::string Result;
102  llvm::raw_string_ostream OS(Result);
103  for (unsigned char C : Content)
104  if (shouldEscape(C))
105  OS << '%' << llvm::format_hex_no_prefix(C, 2, /*Upper = */ true);
106  else
107  OS << C;
108 
109  OS.flush();
110  return Result;
111 }
112 
113 /// Decodes a string according to percent-encoding.
114 std::string percentDecode(llvm::StringRef Content) {
115  std::string Result;
116  for (auto I = Content.begin(), E = Content.end(); I != E; ++I) {
117  if (*I != '%') {
118  Result += *I;
119  continue;
120  }
121  if (*I == '%' && I + 2 < Content.end() && llvm::isHexDigit(*(I + 1)) &&
122  llvm::isHexDigit(*(I + 2))) {
123  Result.push_back(llvm::hexFromNibbles(*(I + 1), *(I + 2)));
124  I += 2;
125  } else
126  Result.push_back(*I);
127  }
128  return Result;
129 }
130 
131 bool isValidScheme(llvm::StringRef Scheme) {
132  if (Scheme.empty())
133  return false;
134  if (!llvm::isAlpha(Scheme[0]))
135  return false;
136  return std::all_of(Scheme.begin() + 1, Scheme.end(), [](char C) {
137  return llvm::isAlnum(C) || C == '+' || C == '.' || C == '-';
138  });
139 }
140 
141 } // namespace
142 
143 URI::URI(llvm::StringRef Scheme, llvm::StringRef Authority,
144  llvm::StringRef Body)
145  : Scheme(Scheme), Authority(Authority), Body(Body) {
146  assert(!Scheme.empty());
147  assert((Authority.empty() || Body.startswith("/")) &&
148  "URI body must start with '/' when authority is present.");
149 }
150 
151 std::string URI::toString() const {
152  std::string Result;
153  llvm::raw_string_ostream OS(Result);
154  OS << percentEncode(Scheme) << ":";
155  if (Authority.empty() && Body.empty())
156  return OS.str();
157  // If authority if empty, we only print body if it starts with "/"; otherwise,
158  // the URI is invalid.
159  if (!Authority.empty() || llvm::StringRef(Body).startswith("/"))
160  OS << "//" << percentEncode(Authority);
161  OS << percentEncode(Body);
162  OS.flush();
163  return Result;
164 }
165 
166 llvm::Expected<URI> URI::parse(llvm::StringRef OrigUri) {
167  URI U;
168  llvm::StringRef Uri = OrigUri;
169 
170  auto Pos = Uri.find(':');
171  if (Pos == llvm::StringRef::npos)
172  return make_string_error("Scheme must be provided in URI: " + OrigUri);
173  auto SchemeStr = Uri.substr(0, Pos);
174  U.Scheme = percentDecode(SchemeStr);
175  if (!isValidScheme(U.Scheme))
176  return make_string_error(llvm::formatv("Invalid scheme: {0} (decoded: {1})",
177  SchemeStr, U.Scheme));
178  Uri = Uri.substr(Pos + 1);
179  if (Uri.consume_front("//")) {
180  Pos = Uri.find('/');
181  U.Authority = percentDecode(Uri.substr(0, Pos));
182  Uri = Uri.substr(Pos);
183  }
184  U.Body = percentDecode(Uri);
185  return U;
186 }
187 
188 llvm::Expected<URI> URI::create(llvm::StringRef AbsolutePath,
189  llvm::StringRef Scheme) {
190  if (!llvm::sys::path::is_absolute(AbsolutePath))
191  return make_string_error("Not a valid absolute path: " + AbsolutePath);
192  auto S = findSchemeByName(Scheme);
193  if (!S)
194  return S.takeError();
195  return S->get()->uriFromAbsolutePath(AbsolutePath);
196 }
197 
198 URI URI::create(llvm::StringRef AbsolutePath) {
199  if (!llvm::sys::path::is_absolute(AbsolutePath))
200  llvm_unreachable(
201  ("Not a valid absolute path: " + AbsolutePath).str().c_str());
202  for (auto &Entry : URISchemeRegistry::entries()) {
203  auto URI = Entry.instantiate()->uriFromAbsolutePath(AbsolutePath);
204  // For some paths, conversion to different URI schemes is impossible. These
205  // should be just skipped.
206  if (!URI) {
207  // Ignore the error.
208  llvm::consumeError(URI.takeError());
209  continue;
210  }
211  return std::move(*URI);
212  }
213  // Fallback to file: scheme which should work for any paths.
214  return URI::createFile(AbsolutePath);
215 }
216 
217 URI URI::createFile(llvm::StringRef AbsolutePath) {
218  auto U = FileSystemScheme().uriFromAbsolutePath(AbsolutePath);
219  if (!U)
220  llvm_unreachable(llvm::toString(U.takeError()).c_str());
221  return std::move(*U);
222 }
223 
224 llvm::Expected<std::string> URI::resolve(const URI &Uri,
225  llvm::StringRef HintPath) {
226  auto S = findSchemeByName(Uri.Scheme);
227  if (!S)
228  return S.takeError();
229  return S->get()->getAbsolutePath(Uri.Authority, Uri.Body, HintPath);
230 }
231 
232 llvm::Expected<std::string> URI::resolvePath(llvm::StringRef AbsPath,
233  llvm::StringRef HintPath) {
234  if (!llvm::sys::path::is_absolute(AbsPath))
235  llvm_unreachable(("Not a valid absolute path: " + AbsPath).str().c_str());
236  for (auto &Entry : URISchemeRegistry::entries()) {
237  auto S = Entry.instantiate();
238  auto U = S->uriFromAbsolutePath(AbsPath);
239  // For some paths, conversion to different URI schemes is impossible. These
240  // should be just skipped.
241  if (!U) {
242  // Ignore the error.
243  llvm::consumeError(U.takeError());
244  continue;
245  }
246  return S->getAbsolutePath(U->Authority, U->Body, HintPath);
247  }
248  // Fallback to file: scheme which doesn't do any canonicalization.
249  return AbsPath;
250 }
251 
252 llvm::Expected<std::string> URI::includeSpelling(const URI &Uri) {
253  auto S = findSchemeByName(Uri.Scheme);
254  if (!S)
255  return S.takeError();
256  return S->get()->getIncludeSpelling(Uri);
257 }
258 
259 } // namespace clangd
260 } // namespace clang
static llvm::StringRef toString(SpecialMemberFunctionsCheck::SpecialMemberFunctionKind K)
static URI createFile(llvm::StringRef AbsolutePath)
This creates a file:// URI for AbsolutePath. The path must be absolute.
Definition: URI.cpp:217
llvm::Registry< URIScheme > URISchemeRegistry
By default, a "file" scheme is supported where URI paths are always absolute in the file system...
Definition: URI.h:128
static llvm::Expected< std::string > resolvePath(llvm::StringRef AbsPath, llvm::StringRef HintPath="")
Resolves AbsPath into a canonical path of its URI, by converting AbsPath to URI and resolving the URI...
Definition: URI.cpp:232
std::string Path
A typedef to represent a file path.
Definition: Path.h:21
llvm::Optional< llvm::Expected< tooling::AtomicChanges > > Result
Position Pos
static llvm::Expected< URI > create(llvm::StringRef AbsolutePath, llvm::StringRef Scheme)
Creates a URI for a file in the given scheme.
Definition: URI.cpp:188
===– Representation.cpp - ClangDoc Representation --------—*- C++ -*-===//
A URI describes the location of a source file.
Definition: URI.h:29
static llvm::Expected< std::string > resolve(const URI &U, llvm::StringRef HintPath="")
Resolves the absolute path of U.
Definition: URI.cpp:224
static llvm::Expected< std::string > includeSpelling(const URI &U)
Gets the preferred spelling of this file for #include, if there is one, e.g.
Definition: URI.cpp:252
static llvm::Expected< URI > parse(llvm::StringRef Uri)
Parse a URI string "<scheme>:[//<authority>/]<path>".
Definition: URI.cpp:166
std::string toString() const
Returns a string URI with all components percent-encoded.
Definition: URI.cpp:151