20 #include "llvm/ADT/ArrayRef.h" 21 #include "llvm/ADT/None.h" 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/ErrorHandling.h" 26 #include "llvm/Support/FormatVariadic.h" 27 #include "llvm/Support/raw_ostream.h" 35 using namespace clang;
40 : Location(Location), Length(Length), Kind(Kind) {
53 return llvm::StringRef(Start,
length());
57 assert(
location().isFileID() &&
"must be a spelled token");
67 auto F = First.
range(SM);
68 auto L = Last.
range(SM);
69 assert(F.file() == L.file() &&
"tokens from different files");
70 assert((F == L || F.endOffset() <= L.beginOffset()) &&
"wrong order of tokens");
71 return FileRange(F.file(), F.beginOffset(), L.endOffset());
79 : File(File),
Begin(BeginOffset),
End(EndOffset) {
81 assert(BeginOffset <= EndOffset);
107 return OS << llvm::formatv(
"FileRange(file = {0}, offsets = {1}-{2})",
113 bool Invalid =
false;
117 assert(Begin <= Text.size());
118 assert(End <= Text.size());
119 return Text.substr(Begin,
length());
126 llvm::partition_point(expandedTokens(), [&](
const syntax::Token &T) {
130 llvm::partition_point(expandedTokens(), [&](
const syntax::Token &T) {
131 return !SourceMgr->isBeforeInTranslationUnit(R.
getEnd(), T.
location());
144 std::pair<const syntax::Token *, const TokenBuffer::Mapping *>
145 TokenBuffer::spelledForExpandedToken(
const syntax::Token *Expanded)
const {
147 assert(ExpandedTokens.data() <= Expanded &&
148 Expanded < ExpandedTokens.data() + ExpandedTokens.size());
150 auto FileIt = Files.find(
151 SourceMgr->getFileID(SourceMgr->getExpansionLoc(Expanded->
location())));
152 assert(FileIt != Files.end() &&
"no file for an expanded token");
154 const MarkedFile &File = FileIt->second;
156 unsigned ExpandedIndex = Expanded - ExpandedTokens.data();
158 auto It = llvm::partition_point(File.Mappings, [&](
const Mapping &M) {
159 return M.BeginExpanded <= ExpandedIndex;
162 if (It == File.Mappings.begin()) {
164 return {&File.SpelledTokens[ExpandedIndex - File.BeginExpanded],
nullptr};
169 if (ExpandedIndex < It->EndExpanded)
170 return {&File.SpelledTokens[It->BeginSpelled], &*It};
175 &File.SpelledTokens[It->EndSpelled + (ExpandedIndex - It->EndExpanded)],
180 auto It = Files.find(FID);
181 assert(It != Files.end());
182 return It->second.SpelledTokens;
185 std::string TokenBuffer::Mapping::str()
const {
186 return llvm::formatv(
"spelled tokens: [{0},{1}), expanded tokens: [{2},{3})",
187 BeginSpelled, EndSpelled, BeginExpanded, EndExpanded);
194 if (Expanded.empty())
200 const Mapping *BeginMapping;
201 std::tie(BeginSpelled, BeginMapping) =
202 spelledForExpandedToken(&Expanded.front());
205 const Mapping *LastMapping;
206 std::tie(LastSpelled, LastMapping) =
207 spelledForExpandedToken(&Expanded.back());
211 if (FID != SourceMgr->getFileID(LastSpelled->location()))
214 const MarkedFile &File = Files.find(FID)->second;
217 unsigned BeginExpanded = Expanded.begin() - ExpandedTokens.data();
218 unsigned EndExpanded = Expanded.end() - ExpandedTokens.data();
219 if (BeginMapping && BeginMapping->BeginExpanded < BeginExpanded)
221 if (LastMapping && EndExpanded < LastMapping->EndExpanded)
224 return llvm::makeArrayRef(
225 BeginMapping ? File.SpelledTokens.data() + BeginMapping->BeginSpelled
227 LastMapping ? File.SpelledTokens.data() + LastMapping->EndSpelled
235 auto FileIt = Files.find(SourceMgr->getFileID(Spelled->
location()));
236 assert(FileIt != Files.end() &&
"file not tracked by token buffer");
238 auto &File = FileIt->second;
239 assert(File.SpelledTokens.data() <= Spelled &&
240 Spelled < (File.SpelledTokens.data() + File.SpelledTokens.size()));
242 unsigned SpelledIndex = Spelled - File.SpelledTokens.data();
243 auto M = llvm::partition_point(File.Mappings, [&](
const Mapping &M) {
244 return M.BeginSpelled < SpelledIndex;
246 if (M == File.Mappings.end() || M->BeginSpelled != SpelledIndex)
250 E.
Spelled = llvm::makeArrayRef(File.SpelledTokens.data() + M->BeginSpelled,
251 File.SpelledTokens.data() + M->EndSpelled);
252 E.
Expanded = llvm::makeArrayRef(ExpandedTokens.data() + M->BeginExpanded,
253 ExpandedTokens.data() + M->EndExpanded);
263 auto *Right = llvm::partition_point(
265 bool AcceptRight = Right != All.end() && Right->location() <= Loc;
266 bool AcceptLeft = Right != All.begin() && (Right - 1)->endLocation() >= Loc;
267 return llvm::makeArrayRef(Right - (AcceptLeft ? 1 : 0),
268 Right + (AcceptRight ? 1 : 0));
275 if (
Tok.kind() == tok::identifier)
281 std::vector<const syntax::Token *>
283 auto FileIt = Files.find(FID);
284 assert(FileIt != Files.end() &&
"file not tracked by token buffer");
285 auto &File = FileIt->second;
286 std::vector<const syntax::Token *> Expansions;
287 auto &Spelled = File.SpelledTokens;
288 for (
auto Mapping : File.Mappings) {
290 if (Token->
kind() == tok::TokenKind::identifier)
291 Expansions.push_back(Token);
298 std::vector<syntax::Token> Tokens;
302 if (T.getKind() == tok::raw_identifier && !T.needsCleaning() &&
305 T.setIdentifierInfo(&II);
314 while (!L.LexFromRawLexer(T))
342 (LastExpansionEnd.isValid() &&
343 Collector->PP.getSourceManager().isBeforeInTranslationUnit(
344 Range.
getBegin(), LastExpansionEnd)))
347 LastExpansionEnd = Range.
getEnd();
374 DEBUG_WITH_TYPE(
"collect-tokens", llvm::dbgs()
385 auto CB = std::make_unique<CollectPPExpansions>(*this);
386 this->Collector = CB.get();
394 Builder(std::vector<syntax::Token> Expanded, PPExpansions CollectedExpansions,
396 : Result(SM), CollectedExpansions(
std::move(CollectedExpansions)), SM(SM),
398 Result.ExpandedTokens = std::move(Expanded);
402 buildSpelledTokens();
412 assert(!Result.ExpandedTokens.empty());
413 assert(Result.ExpandedTokens.back().kind() ==
tok::eof);
414 for (
unsigned I = 0; I < Result.ExpandedTokens.size() - 1; ++I) {
416 processExpandedToken(I);
421 SM.
getFileID(Result.ExpandedTokens.back().location()));
423 Result.ExpandedTokens.back().location(),
424 Result.ExpandedTokens.size() - 1);
425 Result.Files[
SM.
getMainFileID()].EndExpanded = Result.ExpandedTokens.size();
429 fillGapsAtEndOfFiles();
431 return std::move(Result);
438 void processExpandedToken(
unsigned &I) {
439 auto L = Result.ExpandedTokens[I].location();
446 TokenBuffer::MarkedFile &File = Result.Files[FID];
448 fillGapUntil(File, L, I);
451 assert(File.SpelledTokens[NextSpelled[FID]].location() == L &&
452 "no corresponding token in the spelled stream");
462 void processMacroExpansion(
CharSourceRange SpelledRange,
unsigned &I) {
465 TokenBuffer::MarkedFile &File = Result.Files[FID];
467 fillGapUntil(File, SpelledRange.
getBegin(), I);
470 unsigned BeginExpanded = I;
471 for (; I + 1 < Result.ExpandedTokens.size(); ++I) {
472 auto NextL = Result.ExpandedTokens[I + 1].location();
473 if (!NextL.isMacroID() ||
477 unsigned EndExpanded = I + 1;
479 EndExpanded, NextSpelled[FID]);
484 void buildSpelledTokens() {
485 for (
unsigned I = 0; I < Result.ExpandedTokens.size(); ++I) {
488 auto It = Result.Files.try_emplace(FID);
489 TokenBuffer::MarkedFile &File = It.first->second;
491 File.EndExpanded = I + 1;
496 File.BeginExpanded = I;
497 File.SpelledTokens =
tokenize(FID,
SM, LangOpts);
501 void consumeEmptyMapping(TokenBuffer::MarkedFile &File,
unsigned EndOffset,
502 unsigned ExpandedIndex,
unsigned &SpelledIndex) {
503 consumeMapping(File, EndOffset, ExpandedIndex, ExpandedIndex, SpelledIndex);
509 void consumeMapping(TokenBuffer::MarkedFile &File,
unsigned EndOffset,
510 unsigned BeginExpanded,
unsigned EndExpanded,
511 unsigned &SpelledIndex) {
513 unsigned MappingBegin = SpelledIndex;
517 tryConsumeSpelledUntil(File, EndOffset + 1, SpelledIndex).hasValue();
519 assert(!HitMapping &&
"recursive macro expansion?");
521 TokenBuffer::Mapping M;
522 M.BeginExpanded = BeginExpanded;
523 M.EndExpanded = EndExpanded;
524 M.BeginSpelled = MappingBegin;
525 M.EndSpelled = SpelledIndex;
527 File.Mappings.push_back(M);
533 void fillGapUntil(TokenBuffer::MarkedFile &File,
SourceLocation L,
534 unsigned ExpandedIndex) {
540 unsigned &SpelledIndex = NextSpelled[FID];
541 unsigned MappingBegin = SpelledIndex;
543 auto EndLoc = tryConsumeSpelledUntil(File, Offset, SpelledIndex);
544 if (SpelledIndex != MappingBegin) {
545 TokenBuffer::Mapping M;
546 M.BeginSpelled = MappingBegin;
547 M.EndSpelled = SpelledIndex;
548 M.BeginExpanded = M.EndExpanded = ExpandedIndex;
549 File.Mappings.push_back(M);
556 MappingBegin = SpelledIndex;
567 tryConsumeSpelledUntil(TokenBuffer::MarkedFile &File,
unsigned Offset,
568 unsigned &NextSpelled) {
569 for (; NextSpelled < File.SpelledTokens.size(); ++NextSpelled) {
570 auto L = File.SpelledTokens[NextSpelled].location();
574 if (Mapping != CollectedExpansions.end())
575 return Mapping->second;
581 void fillGapsAtEndOfFiles() {
582 for (
auto &F : Result.Files) {
583 if (F.second.SpelledTokens.empty())
585 fillGapUntil(F.second, F.second.SpelledTokens.back().endLocation(),
586 F.second.EndExpanded);
592 llvm::DenseMap<FileID, unsigned> NextSpelled;
593 PPExpansions CollectedExpansions;
599 PP.setTokenWatcher(
nullptr);
600 Collector->disable();
601 return Builder(std::move(Expanded), std::move(Expansions),
602 PP.getSourceManager(), PP.getLangOpts())
616 auto PrintToken = [
this](
const syntax::Token &T) -> std::string {
619 return T.text(*SourceMgr);
622 auto DumpTokens = [
this, &PrintToken](llvm::raw_ostream &OS,
624 if (Tokens.empty()) {
628 OS << Tokens[0].text(*SourceMgr);
629 for (
unsigned I = 1; I < Tokens.size(); ++I) {
632 OS <<
" " << PrintToken(Tokens[I]);
637 llvm::raw_string_ostream OS(Dump);
639 OS <<
"expanded tokens:\n" 642 DumpTokens(OS, llvm::makeArrayRef(ExpandedTokens).drop_back());
645 std::vector<FileID> Keys;
647 Keys.push_back(F.first);
651 const MarkedFile &File = Files.find(
ID)->second;
652 auto *Entry = SourceMgr->getFileEntryForID(
ID);
655 OS << llvm::formatv(
"file '{0}'\n", Entry->getName())
656 <<
" spelled tokens:\n" 661 if (File.Mappings.empty()) {
662 OS <<
" no mappings.\n";
665 OS <<
" mappings:\n";
666 for (
auto &M : File.Mappings) {
668 " ['{0}'_{1}, '{2}'_{3}) => ['{4}'_{5}, '{6}'_{7})\n",
669 PrintToken(File.SpelledTokens[M.BeginSpelled]), M.BeginSpelled,
670 M.EndSpelled == File.SpelledTokens.size()
672 : PrintToken(File.SpelledTokens[M.EndSpelled]),
673 M.EndSpelled, PrintToken(ExpandedTokens[M.BeginExpanded]),
674 M.BeginExpanded, PrintToken(ExpandedTokens[M.EndExpanded]),
llvm::StringRef text(const SourceManager &SM) const
Gets the substring that this FileRange refers to.
const SourceManager & sourceManager() const
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
llvm::ArrayRef< syntax::Token > spelledTokensTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens)
The spelled tokens that overlap or touch a spelling location Loc.
Defines the SourceManager interface.
llvm::raw_ostream & operator<<(llvm::raw_ostream &OS, NodeKind K)
For debugging purposes.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
A description of the current definition of a macro.
std::string str() const
For debugging purposes.
std::vector< const syntax::Token * > macroExpansions(FileID FID) const
Get all tokens that expand a macro in FID.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
A token coming directly from a file or from a macro invocation.
FileRange(FileID File, unsigned BeginOffset, unsigned EndOffset)
EXPECTS: File.isValid() && Begin <= End.
llvm::Optional< llvm::ArrayRef< syntax::Token > > spelledForExpanded(llvm::ArrayRef< syntax::Token > Expanded) const
Find the subrange of spelled tokens that produced the corresponding Expanded tokens.
CollectPPExpansions(TokenCollector &C)
This interface provides a way to observe the actions of the preprocessor as it does its thing...
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
One of these records is kept for each identifier that is lexed.
SourceLocation getBegin() const
unsigned getHashValue() const
Records information reqired to construct mappings for the token buffer that we are collecting...
llvm::ArrayRef< syntax::Token > expandedTokens() const
All tokens produced by the preprocessor after all macro replacements, directives, etc...
Token - This structure provides full information about a lexed token.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
llvm::ArrayRef< syntax::Token > Spelled
SourceLocation getComposedLoc(FileID FID, unsigned Offset) const
Form a SourceLocation from a FileID and Offset pair.
llvm::Optional< Expansion > expansionStartingAt(const syntax::Token *Spelled) const
If Spelled starts a mapping (e.g.
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
SourceLocation location() const
Location of the first character of a token.
CharSourceRange toCharRange(const SourceManager &SM) const
Convert to the clang range.
Defines the Diagnostic-related interfaces.
void setTokenWatcher(llvm::unique_function< void(const clang::Token &)> F)
Register a function that would be called on each token in the final expanded token stream...
LLVM_NODISCARD TokenBuffer consume() &&
Finalizes token collection.
Builder(std::vector< syntax::Token > Expanded, PPExpansions CollectedExpansions, const SourceManager &SM, const LangOptions &LangOpts)
Dump out preprocessed tokens.
Defines the clang::LangOptions interface.
llvm::StringRef text(const SourceManager &SM) const
Get the substring covered by the token.
Represents a character-granular source range.
Implements an efficient mapping from strings to IdentifierInfo nodes.
MacroArgs - An instance of this class captures information about the formal arguments specified to a ...
void disable()
Disabled instance will stop reporting anything to TokenCollector.
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
const syntax::Token * spelledIdentifierTouching(SourceLocation Loc, const syntax::TokenBuffer &Tokens)
The identifier token that overlaps or touches a spelling location Loc.
SourceLocation getEnd() const
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
SourceManager & getSourceManager() const
TokenCollector(Preprocessor &P)
Adds the hooks to collect the tokens.
unsigned getFileOffset(SourceLocation SpellingLoc) const
Returns the offset from the start of the file that the specified SourceLocation represents.
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
std::string dumpForTests(const SourceManager &SM) const
tok::TokenKind kind() const
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
unsigned beginOffset() const
Start is a start offset (inclusive) in the corresponding file.
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
A half-open character range inside a particular file, the start offset is included and the end offset...
FileID getMainFileID() const
Returns the FileID of the main source file.
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
Collects tokens for the main file while running the frontend action.
std::vector< syntax::Token > tokenize(FileID FID, const SourceManager &SM, const LangOptions &LO)
Lex the text buffer, corresponding to FID, in raw mode and record the resulting spelled tokens...
SourceLocation getEnd() const
CharSourceRange getExpansionRange(SourceLocation Loc) const
Given a SourceLocation object, return the range of tokens covered by the expansion in the ultimate fi...
Defines the PPCallbacks interface.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Builds mappings and spelled tokens in the TokenBuffer based on the expanded token stream...
An expansion produced by the preprocessor, includes macro expansions and preprocessor directives...
Token(SourceLocation Location, unsigned Length, tok::TokenKind Kind)
unsigned kind
All of the diagnostics that can be emitted by the frontend.
const char * getTokenName(TokenKind Kind) LLVM_READNONE
Determines the name of a token as used within the front end.
static Decl::Kind getKind(const Decl *D)
std::string dumpForTests() const
llvm::ArrayRef< syntax::Token > spelledTokens(FileID FID) const
Lexed tokens of a file before preprocessing.
A list of tokens obtained by preprocessing a text buffer and operations to map between the expanded a...
FileRange range(const SourceManager &SM) const
Gets a range of this token.
A trivial tuple used to represent a source range.
llvm::ArrayRef< syntax::Token > Expanded
unsigned endOffset() const
End offset (exclusive) in the corresponding file.
SourceLocation getBegin() const
void addPPCallbacks(std::unique_ptr< PPCallbacks > C)
This class handles loading and caching of source files into memory.
void MacroExpands(const clang::Token &MacroNameTok, const MacroDefinition &MD, SourceRange Range, const MacroArgs *Args) override
Called by Preprocessor::HandleMacroExpandedIdentifier when a macro invocation is found.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.