21 #include "llvm/ADT/ArrayRef.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/Support/Allocator.h" 25 #include "llvm/Support/Casting.h" 26 #include "llvm/Support/Compiler.h" 27 #include "llvm/Support/FormatVariadic.h" 28 #include "llvm/Support/MemoryBuffer.h" 29 #include "llvm/Support/raw_ostream.h" 32 using namespace clang;
54 TreeBuilder(syntax::Arena &Arena) : Arena(Arena), Pending(Arena) {
55 for (
const auto &T : Arena.tokenBuffer().expandedTokens())
56 LocationToToken.insert({T.location().getRawEncoding(), &T});
59 llvm::BumpPtrAllocator &
allocator() {
return Arena.allocator(); }
71 void noticeDeclaratorWithoutSemicolon(
Decl *D);
87 auto Tokens = Arena.tokenBuffer().expandedTokens();
88 assert(!Tokens.empty());
89 assert(Tokens.back().kind() ==
tok::eof);
92 Pending.foldChildren(Arena, Tokens.drop_back(),
95 auto *TU = cast<syntax::TranslationUnit>(std::move(Pending).finalize());
96 TU->assertInvariantsRecursive();
108 assert(First == Last ||
109 Arena.sourceManager().isBeforeInTranslationUnit(First, Last));
110 return llvm::makeArrayRef(findToken(First), std::next(findToken(Last)));
114 if (llvm::isa<NamespaceDecl>(D))
116 if (DeclsWithoutSemicolons.count(D))
120 return withTrailingSemicolon(Tokens);
129 if (isa<CompoundStmt>(S))
134 if (Tokens.back().kind() == tok::semi)
136 return withTrailingSemicolon(Tokens);
142 assert(!Tokens.empty());
143 assert(Tokens.back().kind() !=
tok::eof);
145 if (Tokens.back().kind() != tok::semi && Tokens.end()->kind() == tok::semi)
146 return llvm::makeArrayRef(Tokens.begin(), Tokens.end() + 1);
160 Forest(syntax::Arena &A) {
161 assert(!A.tokenBuffer().expandedTokens().empty());
162 assert(A.tokenBuffer().expandedTokens().back().kind() ==
tok::eof);
165 for (
auto &T : A.tokenBuffer().expandedTokens().drop_back()) {
168 L->CanModify = A.tokenBuffer().spelledForExpanded(T).hasValue();
169 Trees.insert(Trees.end(), {&T, NodeAndRole{L}});
173 ~Forest() { assert(DelayedFolds.empty()); }
177 assert(!Range.empty());
178 auto It = Trees.lower_bound(Range.begin());
179 assert(It != Trees.end() &&
"no node found");
180 assert(It->first == Range.begin() &&
"no child with the specified range");
181 assert((std::next(It) == Trees.end() ||
182 std::next(It)->first == Range.end()) &&
183 "no child with the specified range");
184 It->second.Role = Role;
188 void foldChildren(
const syntax::Arena &A,
192 auto BeginExecuted = DelayedFolds.lower_bound(Tokens.begin());
193 auto It = BeginExecuted;
194 for (; It != DelayedFolds.end() && It->second.End <= Tokens.end(); ++It)
195 foldChildrenEager(A, llvm::makeArrayRef(It->first, It->second.End),
197 DelayedFolds.erase(BeginExecuted, It);
200 foldChildrenEager(A, Tokens, Node);
209 assert(!Tokens.empty());
211 DelayedFolds.insert({Tokens.begin(), DelayedFold{Tokens.end(), Node}})
214 assert(Inserted &&
"Multiple delayed folds start at the same token");
221 assert(!ExtendedRange.empty());
222 auto It = DelayedFolds.find(ExtendedRange.data());
223 if (It == DelayedFolds.end())
225 assert(It->second.End <= ExtendedRange.end());
226 It->second.End = ExtendedRange.end();
232 assert(Trees.size() == 1);
233 auto *Root = Trees.begin()->second.Node;
238 std::string str(
const syntax::Arena &A)
const {
240 for (
auto It = Trees.begin(); It != Trees.end(); ++It) {
241 unsigned CoveredTokens =
243 ? (std::next(It)->first - It->first)
244 : A.tokenBuffer().expandedTokens().end() - It->first;
246 R += llvm::formatv(
"- '{0}' covers '{1}'+{2} tokens\n",
247 It->second.Node->kind(),
248 It->first->text(A.sourceManager()), CoveredTokens);
249 R += It->second.Node->dump(A);
257 void foldChildrenEager(
const syntax::Arena &A,
260 assert(Node->firstChild() ==
nullptr &&
"node already has children");
262 auto *FirstToken = Tokens.begin();
263 auto BeginChildren = Trees.lower_bound(FirstToken);
264 assert((BeginChildren == Trees.end() ||
265 BeginChildren->first == FirstToken) &&
266 "fold crosses boundaries of existing subtrees");
267 auto EndChildren = Trees.lower_bound(Tokens.end());
269 (EndChildren == Trees.end() || EndChildren->first == Tokens.end()) &&
270 "fold crosses boundaries of existing subtrees");
273 for (
auto It = EndChildren; It != BeginChildren; --It)
274 Node->prependChildLowLevel(std::prev(It)->second.Node,
275 std::prev(It)->second.Role);
278 Node->Original =
true;
279 Node->CanModify = A.tokenBuffer().spelledForExpanded(Tokens).hasValue();
281 Trees.erase(BeginChildren, EndChildren);
282 Trees.insert({FirstToken, NodeAndRole(Node)});
298 std::map<const syntax::Token *, NodeAndRole> Trees;
302 const syntax::Token *
End =
nullptr;
305 std::map<const syntax::Token *, DelayedFold> DelayedFolds;
309 std::string str() {
return Pending.str(Arena); }
311 syntax::Arena &Arena;
313 llvm::DenseMap< unsigned,
const syntax::Token *>
325 bool shouldTraversePostOrder()
const {
return true; }
340 bool VisitDecl(
Decl *D) {
347 bool WalkUpFromTagDecl(
TagDecl *C) {
368 for (
auto *Child : S->
body())
378 bool WalkUpFromStmt(
Stmt *S) {
400 bool TraverseStmt(
Stmt *S) {
401 if (
auto *DS = llvm::dyn_cast_or_null<DeclStmt>(S)) {
403 for (
auto *D : DS->decls())
405 }
else if (
auto *E = llvm::dyn_cast_or_null<Expr>(S)) {
409 return WalkUpFromExpr(E->IgnoreImplicit());
415 bool WalkUpFromExpr(
Expr *E) {
416 assert(!
isImplicitExpr(E) &&
"should be handled by TraverseStmt");
424 if (Tokens.front().kind() == tok::coloncolon) {
437 bool WalkUpFromDeclStmt(
DeclStmt *S) {
443 bool WalkUpFromNullStmt(
NullStmt *S) {
458 bool WalkUpFromCaseStmt(
CaseStmt *S) {
477 bool WalkUpFromIfStmt(
IfStmt *S) {
490 bool WalkUpFromForStmt(
ForStmt *S) {
601 llvm::BumpPtrAllocator &allocator() {
return Builder.
allocator(); }
610 Pending.foldChildren(Arena, Range, New);
615 if (Pending.extendDelayedFold(Range))
617 Pending.foldChildrenDelayed(Range,
622 DeclsWithoutSemicolons.insert(D);
628 Pending.assignRole(*findToken(Loc), Role);
635 auto Range = getStmtRange(Child);
638 if (
auto *E = dyn_cast<Expr>(Child)) {
639 Pending.assignRole(getExprRange(E),
642 Pending.foldChildren(Arena, Range,
645 Pending.assignRole(Range, Role);
652 Pending.assignRole(getExprRange(Child), Role);
655 const syntax::Token *syntax::TreeBuilder::findToken(
SourceLocation L)
const {
657 assert(It != LocationToToken.end());
663 TreeBuilder Builder(A);
665 return std::move(Builder).finalize();
SourceLocation getRBracLoc() const
SourceLocation getForLoc() const
An opening parenthesis in argument lists and blocks, e.g. '{', '(', etc.
SourceLocation getForLoc() const
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
Children of an unknown semantic nature, e.g. skipped tokens, comments.
void markChildToken(SourceLocation Loc, NodeRole R)
Set role for a token starting at Loc.
Stmt - This represents one statement.
IfStmt - This represents an if/then/else.
Defines the SourceManager interface.
Decl - This represents one declaration (or definition), e.g.
SourceLocation getBeginLoc() const LLVM_READONLY
static LLVM_ATTRIBUTE_UNUSED bool isImplicitExpr(clang::Expr *E)
Represents an empty-declaration.
syntax::TranslationUnit * buildSyntaxTree(Arena &A, const clang::TranslationUnitDecl &TU)
Build a syntax tree for the main file.
Represent a C++ namespace.
SourceLocation getEndLoc() const LLVM_READONLY
void finalize(TemplateInstantiationCallbackPtrs &Callbacks, const Sema &TheSema)
void foldNode(llvm::ArrayRef< syntax::Token > Range, syntax::Tree *New)
Populate children for New node, assuming it covers tokens from Range.
SourceLocation getIfLoc() const
llvm::BumpPtrAllocator & allocator()
llvm::ArrayRef< syntax::Token > getExprRange(const Expr *E) const
StringLiteral * getMessage()
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
SourceLocation getBeginLoc() const LLVM_READONLY
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
An inner statement for those that have only a single child of kind statement, e.g.
Represents a C++ using-declaration.
ForStmt - This represents a 'for (init;cond;inc)' stmt.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getLBracLoc() const
ASTContext & getASTContext() const
CXXForRangeStmt - This represents C++0x [stmt.ranged]'s ranged for statement, represented as 'for (ra...
CaseStmt - Represent a case statement.
SourceLocation getContinueLoc() const
A class that does preorder or postorder depth-first traversal on the entire Clang AST and visits each...
Represents a linkage specification.
Represents the declaration of a typedef-name via a C++11 alias-declaration.
CompoundStmt - This represents a group of statements like { stmt stmt }.
Represents a ValueDecl that came out of a declarator.
A memory arena for syntax trees.
A keywords that introduces some grammar construct, e.g. 'if', 'try', etc.
This represents one expression.
SourceLocation getElseLoc() const
SourceLocation getWhileLoc() const
bool isImplicit() const
isImplicit - Indicates whether the declaration was implicitly generated by the implementation.
SourceLocation getSwitchLoc() const
ReturnStmt - This represents a return, optionally of an expression: return; return 4;...
void markStmtChild(Stmt *Child, NodeRole Role)
Mark the Child node with a corresponding Role.
void markExprChild(Expr *Child, NodeRole Role)
Should be called for expressions in non-statement position to avoid wrapping into expression statemen...
A closing parenthesis in argument lists and blocks, e.g. '}', ')', etc.
SourceLocation getEndLoc() const LLVM_READONLY
Encodes a location in the source.
A helper class for constructing the syntax tree while traversing a clang AST.
Represents the declaration of a struct/union/class/enum.
DeclStmt - Adaptor class for mixing declarations with statements and expressions. ...
Represents a dependent using declaration which was not marked with typename.
void noticeDeclaratorWithoutSemicolon(Decl *D)
Notifies that we should not consume trailing semicolon when computing token range of D...
bool TraverseStmt(Stmt *S, DataRecursionQueue *Queue=nullptr)
Recursively visit a statement or expression, by dispatching to Traverse*() based on the argument's dy...
syntax::TranslationUnit * finalize() &&
Finish building the tree and consume the root node.
Represents a C++11 static_assert declaration.
SourceLocation getKeywordLoc() const
Base class for declarations which introduce a typedef-name.
ast_type_traits::DynTypedNode Node
NodeRole
A relation between a parent and child node, e.g.
NullStmt - This is the null statement ";": C99 6.8.3p3.
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
Represents a dependent using declaration which was marked with typename.
llvm::ArrayRef< syntax::Token > getStmtRange(const Stmt *S) const
Find the adjusted range for the statement, consuming the trailing semicolon when needed.
SwitchStmt - This represents a 'switch' stmt.
Expr * IgnoreImplicit() LLVM_READONLY
Skip past any implicit AST nodes which might surround this expression until reaching a fixed point...
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
ContinueStmt - This represents a continue.
VarDecl * getLoopVariable()
WhileStmt - This represents a 'while' stmt.
SourceLocation getBreakLoc() const
llvm::ArrayRef< syntax::Token > getRange(const Decl *D) const
The top declaration context.
BreakStmt - This represents a break.
TreeBuilder(syntax::Arena &Arena)
Represents a C++ namespace alias.
Represents C++ using-directive.
bool isFreeStanding() const
True if this tag is free standing, e.g. "struct foo;".
const LangOptions & getLangOpts() const
llvm::ArrayRef< syntax::Token > getRange(SourceLocation First, SourceLocation Last) const
getRange() finds the syntax tokens corresponding to the passed source locations.
void noticeDeclaratorRange(llvm::ArrayRef< syntax::Token > Range)
Must be called with the range of each DeclaratorDecl.
SourceLocation getReturnLoc() const