clang  10.0.0git
FormatToken.h
Go to the documentation of this file.
1 //===--- FormatToken.h - Format C++ code ------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 ///
9 /// \file
10 /// This file contains the declaration of the FormatToken, a wrapper
11 /// around Token with additional information related to formatting.
12 ///
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
16 #define LLVM_CLANG_LIB_FORMAT_FORMATTOKEN_H
17 
20 #include "clang/Format/Format.h"
21 #include "clang/Lex/Lexer.h"
22 #include <memory>
23 #include <unordered_set>
24 
25 namespace clang {
26 namespace format {
27 
28 #define LIST_TOKEN_TYPES \
29  TYPE(ArrayInitializerLSquare) \
30  TYPE(ArraySubscriptLSquare) \
31  TYPE(AttributeColon) \
32  TYPE(AttributeParen) \
33  TYPE(AttributeSquare) \
34  TYPE(BinaryOperator) \
35  TYPE(BitFieldColon) \
36  TYPE(BlockComment) \
37  TYPE(CastRParen) \
38  TYPE(ConditionalExpr) \
39  TYPE(ConflictAlternative) \
40  TYPE(ConflictEnd) \
41  TYPE(ConflictStart) \
42  TYPE(CtorInitializerColon) \
43  TYPE(CtorInitializerComma) \
44  TYPE(DesignatedInitializerLSquare) \
45  TYPE(DesignatedInitializerPeriod) \
46  TYPE(DictLiteral) \
47  TYPE(ForEachMacro) \
48  TYPE(FunctionAnnotationRParen) \
49  TYPE(FunctionDeclarationName) \
50  TYPE(FunctionLBrace) \
51  TYPE(FunctionTypeLParen) \
52  TYPE(ImplicitStringLiteral) \
53  TYPE(InheritanceColon) \
54  TYPE(InheritanceComma) \
55  TYPE(InlineASMBrace) \
56  TYPE(InlineASMColon) \
57  TYPE(JavaAnnotation) \
58  TYPE(JsComputedPropertyName) \
59  TYPE(JsExponentiation) \
60  TYPE(JsExponentiationEqual) \
61  TYPE(JsFatArrow) \
62  TYPE(JsNonNullAssertion) \
63  TYPE(JsNullishCoalescingOperator) \
64  TYPE(JsNullPropagatingOperator) \
65  TYPE(JsPrivateIdentifier) \
66  TYPE(JsTypeColon) \
67  TYPE(JsTypeOperator) \
68  TYPE(JsTypeOptionalQuestion) \
69  TYPE(LambdaArrow) \
70  TYPE(LambdaLBrace) \
71  TYPE(LambdaLSquare) \
72  TYPE(LeadingJavaAnnotation) \
73  TYPE(LineComment) \
74  TYPE(MacroBlockBegin) \
75  TYPE(MacroBlockEnd) \
76  TYPE(NamespaceMacro) \
77  TYPE(ObjCBlockLBrace) \
78  TYPE(ObjCBlockLParen) \
79  TYPE(ObjCDecl) \
80  TYPE(ObjCForIn) \
81  TYPE(ObjCMethodExpr) \
82  TYPE(ObjCMethodSpecifier) \
83  TYPE(ObjCProperty) \
84  TYPE(ObjCStringLiteral) \
85  TYPE(OverloadedOperator) \
86  TYPE(OverloadedOperatorLParen) \
87  TYPE(PointerOrReference) \
88  TYPE(PureVirtualSpecifier) \
89  TYPE(RangeBasedForLoopColon) \
90  TYPE(RegexLiteral) \
91  TYPE(SelectorName) \
92  TYPE(StartOfName) \
93  TYPE(StatementMacro) \
94  TYPE(StructuredBindingLSquare) \
95  TYPE(TemplateCloser) \
96  TYPE(TemplateOpener) \
97  TYPE(TemplateString) \
98  TYPE(ProtoExtensionLSquare) \
99  TYPE(TrailingAnnotation) \
100  TYPE(TrailingReturnArrow) \
101  TYPE(TrailingUnaryOperator) \
102  TYPE(TypenameMacro) \
103  TYPE(UnaryOperator) \
104  TYPE(CSharpStringLiteral) \
105  TYPE(CSharpNullCoalescing) \
106  TYPE(Unknown)
107 
108 enum TokenType {
109 #define TYPE(X) TT_##X,
111 #undef TYPE
113 };
114 
115 /// Determines the name of a token type.
116 const char *getTokenTypeName(TokenType Type);
117 
118 // Represents what type of block a set of braces open.
120 
121 // The packing kind of a function's parameters.
123 
125 
126 class TokenRole;
127 class AnnotatedLine;
128 
129 /// A wrapper around a \c Token storing information about the
130 /// whitespace characters preceding it.
131 struct FormatToken {
133 
134  /// The \c Token.
136 
137  /// The number of newlines immediately before the \c Token.
138  ///
139  /// This can be used to determine what the user wrote in the original code
140  /// and thereby e.g. leave an empty line between two function definitions.
141  unsigned NewlinesBefore = 0;
142 
143  /// Whether there is at least one unescaped newline before the \c
144  /// Token.
145  bool HasUnescapedNewline = false;
146 
147  /// The range of the whitespace immediately preceding the \c Token.
149 
150  /// The offset just past the last '\n' in this token's leading
151  /// whitespace (relative to \c WhiteSpaceStart). 0 if there is no '\n'.
152  unsigned LastNewlineOffset = 0;
153 
154  /// The width of the non-whitespace parts of the token (or its first
155  /// line for multi-line tokens) in columns.
156  /// We need this to correctly measure number of columns a token spans.
157  unsigned ColumnWidth = 0;
158 
159  /// Contains the width in columns of the last line of a multi-line
160  /// token.
161  unsigned LastLineColumnWidth = 0;
162 
163  /// Whether the token text contains newlines (escaped or not).
164  bool IsMultiline = false;
165 
166  /// Indicates that this is the first token of the file.
167  bool IsFirst = false;
168 
169  /// Whether there must be a line break before this token.
170  ///
171  /// This happens for example when a preprocessor directive ended directly
172  /// before the token.
173  bool MustBreakBefore = false;
174 
175  /// The raw text of the token.
176  ///
177  /// Contains the raw token text without leading whitespace and without leading
178  /// escaped newlines.
179  StringRef TokenText;
180 
181  /// Set to \c true if this token is an unterminated literal.
183 
184  /// Contains the kind of block if this token is a brace.
186 
187  TokenType Type = TT_Unknown;
188 
189  /// The number of spaces that should be inserted before this token.
190  unsigned SpacesRequiredBefore = 0;
191 
192  /// \c true if it is allowed to break before this token.
193  bool CanBreakBefore = false;
194 
195  /// \c true if this is the ">" of "template<..>".
197 
198  /// Number of parameters, if this is "(", "[" or "<".
199  unsigned ParameterCount = 0;
200 
201  /// Number of parameters that are nested blocks,
202  /// if this is "(", "[" or "<".
203  unsigned BlockParameterCount = 0;
204 
205  /// If this is a bracket ("<", "(", "[" or "{"), contains the kind of
206  /// the surrounding bracket.
208 
209  /// A token can have a special role that can carry extra information
210  /// about the token's formatting.
211  std::unique_ptr<TokenRole> Role;
212 
213  /// If this is an opening parenthesis, how are the parameters packed?
215 
216  /// The total length of the unwrapped line up to and including this
217  /// token.
218  unsigned TotalLength = 0;
219 
220  /// The original 0-based column of this token, including expanded tabs.
221  /// The configured TabWidth is used as tab width.
222  unsigned OriginalColumn = 0;
223 
224  /// The length of following tokens until the next natural split point,
225  /// or the next token that can be broken.
226  unsigned UnbreakableTailLength = 0;
227 
228  // FIXME: Come up with a 'cleaner' concept.
229  /// The binding strength of a token. This is a combined value of
230  /// operator precedence, parenthesis nesting, etc.
231  unsigned BindingStrength = 0;
232 
233  /// The nesting level of this token, i.e. the number of surrounding (),
234  /// [], {} or <>.
235  unsigned NestingLevel = 0;
236 
237  /// The indent level of this token. Copied from the surrounding line.
238  unsigned IndentLevel = 0;
239 
240  /// Penalty for inserting a line break before this token.
241  unsigned SplitPenalty = 0;
242 
243  /// If this is the first ObjC selector name in an ObjC method
244  /// definition or call, this contains the length of the longest name.
245  ///
246  /// This being set to 0 means that the selectors should not be colon-aligned,
247  /// e.g. because several of them are block-type.
249 
250  /// If this is the first ObjC selector name in an ObjC method
251  /// definition or call, this contains the number of parts that the whole
252  /// selector consist of.
253  unsigned ObjCSelectorNameParts = 0;
254 
255  /// The 0-based index of the parameter/argument. For ObjC it is set
256  /// for the selector name token.
257  /// For now calculated only for ObjC.
258  unsigned ParameterIndex = 0;
259 
260  /// Stores the number of required fake parentheses and the
261  /// corresponding operator precedence.
262  ///
263  /// If multiple fake parentheses start at a token, this vector stores them in
264  /// reverse order, i.e. inner fake parenthesis first.
266  /// Insert this many fake ) after this token for correct indentation.
267  unsigned FakeRParens = 0;
268 
269  /// \c true if this token starts a binary expression, i.e. has at least
270  /// one fake l_paren with a precedence greater than prec::Unknown.
272  /// \c true if this token ends a binary expression.
273  bool EndsBinaryExpression = false;
274 
275  /// If this is an operator (or "."/"->") in a sequence of operators
276  /// with the same precedence, contains the 0-based operator index.
277  unsigned OperatorIndex = 0;
278 
279  /// If this is an operator (or "."/"->") in a sequence of operators
280  /// with the same precedence, points to the next operator.
282 
283  /// Is this token part of a \c DeclStmt defining multiple variables?
284  ///
285  /// Only set if \c Type == \c TT_StartOfName.
287 
288  /// Does this line comment continue a line comment section?
289  ///
290  /// Only set to true if \c Type == \c TT_LineComment.
292 
293  /// If this is a bracket, this points to the matching one.
295 
296  /// The previous token in the unwrapped line.
297  FormatToken *Previous = nullptr;
298 
299  /// The next token in the unwrapped line.
300  FormatToken *Next = nullptr;
301 
302  /// If this token starts a block, this contains all the unwrapped lines
303  /// in it.
305 
306  /// Stores the formatting decision for the token once it was made.
308 
309  /// If \c true, this token has been fully formatted (indented and
310  /// potentially re-formatted inside), and we do not allow further formatting
311  /// changes.
312  bool Finalized = false;
313 
314  bool is(tok::TokenKind Kind) const { return Tok.is(Kind); }
315  bool is(TokenType TT) const { return Type == TT; }
316  bool is(const IdentifierInfo *II) const {
317  return II && II == Tok.getIdentifierInfo();
318  }
319  bool is(tok::PPKeywordKind Kind) const {
320  return Tok.getIdentifierInfo() &&
322  }
323  template <typename A, typename B> bool isOneOf(A K1, B K2) const {
324  return is(K1) || is(K2);
325  }
326  template <typename A, typename B, typename... Ts>
327  bool isOneOf(A K1, B K2, Ts... Ks) const {
328  return is(K1) || isOneOf(K2, Ks...);
329  }
330  template <typename T> bool isNot(T Kind) const { return !is(Kind); }
331 
332  bool isIf(bool AllowConstexprMacro = true) const {
333  return is(tok::kw_if) || endsSequence(tok::kw_constexpr, tok::kw_if) ||
334  (endsSequence(tok::identifier, tok::kw_if) && AllowConstexprMacro);
335  }
336 
337  bool closesScopeAfterBlock() const {
338  if (BlockKind == BK_Block)
339  return true;
340  if (closesScope())
341  return Previous->closesScopeAfterBlock();
342  return false;
343  }
344 
345  /// \c true if this token starts a sequence with the given tokens in order,
346  /// following the ``Next`` pointers, ignoring comments.
347  template <typename A, typename... Ts>
348  bool startsSequence(A K1, Ts... Tokens) const {
349  return startsSequenceInternal(K1, Tokens...);
350  }
351 
352  /// \c true if this token ends a sequence with the given tokens in order,
353  /// following the ``Previous`` pointers, ignoring comments.
354  /// For example, given tokens [T1, T2, T3], the function returns true if
355  /// 3 tokens ending at this (ignoring comments) are [T3, T2, T1]. In other
356  /// words, the tokens passed to this function need to the reverse of the
357  /// order the tokens appear in code.
358  template <typename A, typename... Ts>
359  bool endsSequence(A K1, Ts... Tokens) const {
360  return endsSequenceInternal(K1, Tokens...);
361  }
362 
363  bool isStringLiteral() const { return tok::isStringLiteral(Tok.getKind()); }
364 
366  return Tok.isObjCAtKeyword(Kind);
367  }
368 
369  bool isAccessSpecifier(bool ColonRequired = true) const {
370  return isOneOf(tok::kw_public, tok::kw_protected, tok::kw_private) &&
371  (!ColonRequired || (Next && Next->is(tok::colon)));
372  }
373 
374  /// Determine whether the token is a simple-type-specifier.
375  bool isSimpleTypeSpecifier() const;
376 
377  bool isObjCAccessSpecifier() const {
378  return is(tok::at) && Next &&
379  (Next->isObjCAtKeyword(tok::objc_public) ||
380  Next->isObjCAtKeyword(tok::objc_protected) ||
381  Next->isObjCAtKeyword(tok::objc_package) ||
382  Next->isObjCAtKeyword(tok::objc_private));
383  }
384 
385  /// Returns whether \p Tok is ([{ or an opening < of a template or in
386  /// protos.
387  bool opensScope() const {
388  if (is(TT_TemplateString) && TokenText.endswith("${"))
389  return true;
390  if (is(TT_DictLiteral) && is(tok::less))
391  return true;
392  return isOneOf(tok::l_paren, tok::l_brace, tok::l_square,
393  TT_TemplateOpener);
394  }
395  /// Returns whether \p Tok is )]} or a closing > of a template or in
396  /// protos.
397  bool closesScope() const {
398  if (is(TT_TemplateString) && TokenText.startswith("}"))
399  return true;
400  if (is(TT_DictLiteral) && is(tok::greater))
401  return true;
402  return isOneOf(tok::r_paren, tok::r_brace, tok::r_square,
403  TT_TemplateCloser);
404  }
405 
406  /// Returns \c true if this is a "." or "->" accessing a member.
407  bool isMemberAccess() const {
408  return isOneOf(tok::arrow, tok::period, tok::arrowstar) &&
409  !isOneOf(TT_DesignatedInitializerPeriod, TT_TrailingReturnArrow,
410  TT_LambdaArrow, TT_LeadingJavaAnnotation);
411  }
412 
413  bool isUnaryOperator() const {
414  switch (Tok.getKind()) {
415  case tok::plus:
416  case tok::plusplus:
417  case tok::minus:
418  case tok::minusminus:
419  case tok::exclaim:
420  case tok::tilde:
421  case tok::kw_sizeof:
422  case tok::kw_alignof:
423  return true;
424  default:
425  return false;
426  }
427  }
428 
429  bool isBinaryOperator() const {
430  // Comma is a binary operator, but does not behave as such wrt. formatting.
431  return getPrecedence() > prec::Comma;
432  }
433 
434  bool isTrailingComment() const {
435  return is(tok::comment) &&
436  (is(TT_LineComment) || !Next || Next->NewlinesBefore > 0);
437  }
438 
439  /// Returns \c true if this is a keyword that can be used
440  /// like a function call (e.g. sizeof, typeid, ...).
441  bool isFunctionLikeKeyword() const {
442  switch (Tok.getKind()) {
443  case tok::kw_throw:
444  case tok::kw_typeid:
445  case tok::kw_return:
446  case tok::kw_sizeof:
447  case tok::kw_alignof:
448  case tok::kw_alignas:
449  case tok::kw_decltype:
450  case tok::kw_noexcept:
451  case tok::kw_static_assert:
452  case tok::kw___attribute:
453  return true;
454  default:
455  return false;
456  }
457  }
458 
459  /// Returns \c true if this is a string literal that's like a label,
460  /// e.g. ends with "=" or ":".
461  bool isLabelString() const {
462  if (!is(tok::string_literal))
463  return false;
464  StringRef Content = TokenText;
465  if (Content.startswith("\"") || Content.startswith("'"))
466  Content = Content.drop_front(1);
467  if (Content.endswith("\"") || Content.endswith("'"))
468  Content = Content.drop_back(1);
469  Content = Content.trim();
470  return Content.size() > 1 &&
471  (Content.back() == ':' || Content.back() == '=');
472  }
473 
474  /// Returns actual token start location without leading escaped
475  /// newlines and whitespace.
476  ///
477  /// This can be different to Tok.getLocation(), which includes leading escaped
478  /// newlines.
480  return WhitespaceRange.getEnd();
481  }
482 
484  return getBinOpPrecedence(Tok.getKind(), /*GreaterThanIsOperator=*/true,
485  /*CPlusPlus11=*/true);
486  }
487 
488  /// Returns the previous token ignoring comments.
490  FormatToken *Tok = Previous;
491  while (Tok && Tok->is(tok::comment))
492  Tok = Tok->Previous;
493  return Tok;
494  }
495 
496  /// Returns the next token ignoring comments.
498  const FormatToken *Tok = Next;
499  while (Tok && Tok->is(tok::comment))
500  Tok = Tok->Next;
501  return Tok;
502  }
503 
504  /// Returns \c true if this tokens starts a block-type list, i.e. a
505  /// list that should be indented with a block indent.
507  if (is(TT_TemplateString) && opensScope())
508  return true;
509  return is(TT_ArrayInitializerLSquare) || is(TT_ProtoExtensionLSquare) ||
510  (is(tok::l_brace) &&
511  (BlockKind == BK_Block || is(TT_DictLiteral) ||
512  (!Style.Cpp11BracedListStyle && NestingLevel == 0))) ||
513  (is(tok::less) && (Style.Language == FormatStyle::LK_Proto ||
514  Style.Language == FormatStyle::LK_TextProto));
515  }
516 
517  /// Returns whether the token is the left square bracket of a C++
518  /// structured binding declaration.
520  if (!Style.isCpp() || isNot(tok::l_square))
521  return false;
522  const FormatToken *T = this;
523  do {
524  T = T->getPreviousNonComment();
525  } while (T && T->isOneOf(tok::kw_const, tok::kw_volatile, tok::amp,
526  tok::ampamp));
527  return T && T->is(tok::kw_auto);
528  }
529 
530  /// Same as opensBlockOrBlockTypeList, but for the closing token.
532  if (is(TT_TemplateString) && closesScope())
533  return true;
534  return MatchingParen && MatchingParen->opensBlockOrBlockTypeList(Style);
535  }
536 
537  /// Return the actual namespace token, if this token starts a namespace
538  /// block.
540  const FormatToken *NamespaceTok = this;
541  if (is(tok::comment))
542  NamespaceTok = NamespaceTok->getNextNonComment();
543  // Detect "(inline|export)? namespace" in the beginning of a line.
544  if (NamespaceTok && NamespaceTok->isOneOf(tok::kw_inline, tok::kw_export))
545  NamespaceTok = NamespaceTok->getNextNonComment();
546  return NamespaceTok &&
547  NamespaceTok->isOneOf(tok::kw_namespace, TT_NamespaceMacro)
548  ? NamespaceTok
549  : nullptr;
550  }
551 
552 private:
553  // Disallow copying.
554  FormatToken(const FormatToken &) = delete;
555  void operator=(const FormatToken &) = delete;
556 
557  template <typename A, typename... Ts>
558  bool startsSequenceInternal(A K1, Ts... Tokens) const {
559  if (is(tok::comment) && Next)
560  return Next->startsSequenceInternal(K1, Tokens...);
561  return is(K1) && Next && Next->startsSequenceInternal(Tokens...);
562  }
563 
564  template <typename A> bool startsSequenceInternal(A K1) const {
565  if (is(tok::comment) && Next)
566  return Next->startsSequenceInternal(K1);
567  return is(K1);
568  }
569 
570  template <typename A, typename... Ts> bool endsSequenceInternal(A K1) const {
571  if (is(tok::comment) && Previous)
572  return Previous->endsSequenceInternal(K1);
573  return is(K1);
574  }
575 
576  template <typename A, typename... Ts>
577  bool endsSequenceInternal(A K1, Ts... Tokens) const {
578  if (is(tok::comment) && Previous)
579  return Previous->endsSequenceInternal(K1, Tokens...);
580  return is(K1) && Previous && Previous->endsSequenceInternal(Tokens...);
581  }
582 };
583 
585 struct LineState;
586 
587 class TokenRole {
588 public:
589  TokenRole(const FormatStyle &Style) : Style(Style) {}
590  virtual ~TokenRole();
591 
592  /// After the \c TokenAnnotator has finished annotating all the tokens,
593  /// this function precomputes required information for formatting.
594  virtual void precomputeFormattingInfos(const FormatToken *Token);
595 
596  /// Apply the special formatting that the given role demands.
597  ///
598  /// Assumes that the token having this role is already formatted.
599  ///
600  /// Continues formatting from \p State leaving indentation to \p Indenter and
601  /// returns the total penalty that this formatting incurs.
602  virtual unsigned formatFromToken(LineState &State,
604  bool DryRun) {
605  return 0;
606  }
607 
608  /// Same as \c formatFromToken, but assumes that the first token has
609  /// already been set thereby deciding on the first line break.
610  virtual unsigned formatAfterToken(LineState &State,
612  bool DryRun) {
613  return 0;
614  }
615 
616  /// Notifies the \c Role that a comma was found.
617  virtual void CommaFound(const FormatToken *Token) {}
618 
619  virtual const FormatToken *lastComma() { return nullptr; }
620 
621 protected:
623 };
624 
626 public:
628  : TokenRole(Style), HasNestedBracedList(false) {}
629 
630  void precomputeFormattingInfos(const FormatToken *Token) override;
631 
632  unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter,
633  bool DryRun) override;
634 
635  unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter,
636  bool DryRun) override;
637 
638  /// Adds \p Token as the next comma to the \c CommaSeparated list.
639  void CommaFound(const FormatToken *Token) override {
640  Commas.push_back(Token);
641  }
642 
643  const FormatToken *lastComma() override {
644  if (Commas.empty())
645  return nullptr;
646  return Commas.back();
647  }
648 
649 private:
650  /// A struct that holds information on how to format a given list with
651  /// a specific number of columns.
652  struct ColumnFormat {
653  /// The number of columns to use.
654  unsigned Columns;
655 
656  /// The total width in characters.
657  unsigned TotalWidth;
658 
659  /// The number of lines required for this format.
660  unsigned LineCount;
661 
662  /// The size of each column in characters.
663  SmallVector<unsigned, 8> ColumnSizes;
664  };
665 
666  /// Calculate which \c ColumnFormat fits best into
667  /// \p RemainingCharacters.
668  const ColumnFormat *getColumnFormat(unsigned RemainingCharacters) const;
669 
670  /// The ordered \c FormatTokens making up the commas of this list.
672 
673  /// The length of each of the list's items in characters including the
674  /// trailing comma.
675  SmallVector<unsigned, 8> ItemLengths;
676 
677  /// Precomputed formats that can be used for this list.
679 
680  bool HasNestedBracedList;
681 };
682 
683 /// Encapsulates keywords that are context sensitive or for languages not
684 /// properly supported by Clang's lexer.
687  kw_final = &IdentTable.get("final");
688  kw_override = &IdentTable.get("override");
689  kw_in = &IdentTable.get("in");
690  kw_of = &IdentTable.get("of");
691  kw_CF_CLOSED_ENUM = &IdentTable.get("CF_CLOSED_ENUM");
692  kw_CF_ENUM = &IdentTable.get("CF_ENUM");
693  kw_CF_OPTIONS = &IdentTable.get("CF_OPTIONS");
694  kw_NS_CLOSED_ENUM = &IdentTable.get("NS_CLOSED_ENUM");
695  kw_NS_ENUM = &IdentTable.get("NS_ENUM");
696  kw_NS_OPTIONS = &IdentTable.get("NS_OPTIONS");
697 
698  kw_as = &IdentTable.get("as");
699  kw_async = &IdentTable.get("async");
700  kw_await = &IdentTable.get("await");
701  kw_declare = &IdentTable.get("declare");
702  kw_finally = &IdentTable.get("finally");
703  kw_from = &IdentTable.get("from");
704  kw_function = &IdentTable.get("function");
705  kw_get = &IdentTable.get("get");
706  kw_import = &IdentTable.get("import");
707  kw_infer = &IdentTable.get("infer");
708  kw_is = &IdentTable.get("is");
709  kw_let = &IdentTable.get("let");
710  kw_module = &IdentTable.get("module");
711  kw_readonly = &IdentTable.get("readonly");
712  kw_set = &IdentTable.get("set");
713  kw_type = &IdentTable.get("type");
714  kw_typeof = &IdentTable.get("typeof");
715  kw_var = &IdentTable.get("var");
716  kw_yield = &IdentTable.get("yield");
717 
718  kw_abstract = &IdentTable.get("abstract");
719  kw_assert = &IdentTable.get("assert");
720  kw_extends = &IdentTable.get("extends");
721  kw_implements = &IdentTable.get("implements");
722  kw_instanceof = &IdentTable.get("instanceof");
723  kw_interface = &IdentTable.get("interface");
724  kw_native = &IdentTable.get("native");
725  kw_package = &IdentTable.get("package");
726  kw_synchronized = &IdentTable.get("synchronized");
727  kw_throws = &IdentTable.get("throws");
728  kw___except = &IdentTable.get("__except");
729  kw___has_include = &IdentTable.get("__has_include");
730  kw___has_include_next = &IdentTable.get("__has_include_next");
731 
732  kw_mark = &IdentTable.get("mark");
733 
734  kw_extend = &IdentTable.get("extend");
735  kw_option = &IdentTable.get("option");
736  kw_optional = &IdentTable.get("optional");
737  kw_repeated = &IdentTable.get("repeated");
738  kw_required = &IdentTable.get("required");
739  kw_returns = &IdentTable.get("returns");
740 
741  kw_signals = &IdentTable.get("signals");
742  kw_qsignals = &IdentTable.get("Q_SIGNALS");
743  kw_slots = &IdentTable.get("slots");
744  kw_qslots = &IdentTable.get("Q_SLOTS");
745 
746  // C# keywords
747  kw_dollar = &IdentTable.get("dollar");
748  kw_base = &IdentTable.get("base");
749  kw_byte = &IdentTable.get("byte");
750  kw_checked = &IdentTable.get("checked");
751  kw_decimal = &IdentTable.get("decimal");
752  kw_delegate = &IdentTable.get("delegate");
753  kw_event = &IdentTable.get("event");
754  kw_fixed = &IdentTable.get("fixed");
755  kw_foreach = &IdentTable.get("foreach");
756  kw_implicit = &IdentTable.get("implicit");
757  kw_internal = &IdentTable.get("internal");
758  kw_lock = &IdentTable.get("lock");
759  kw_null = &IdentTable.get("null");
760  kw_object = &IdentTable.get("object");
761  kw_out = &IdentTable.get("out");
762  kw_params = &IdentTable.get("params");
763  kw_ref = &IdentTable.get("ref");
764  kw_string = &IdentTable.get("string");
765  kw_stackalloc = &IdentTable.get("stackalloc");
766  kw_sbyte = &IdentTable.get("sbyte");
767  kw_sealed = &IdentTable.get("sealed");
768  kw_uint = &IdentTable.get("uint");
769  kw_ulong = &IdentTable.get("ulong");
770  kw_unchecked = &IdentTable.get("unchecked");
771  kw_unsafe = &IdentTable.get("unsafe");
772  kw_ushort = &IdentTable.get("ushort");
773 
774  // Keep this at the end of the constructor to make sure everything here
775  // is
776  // already initialized.
777  JsExtraKeywords = std::unordered_set<IdentifierInfo *>(
778  {kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
779  kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
780  kw_set, kw_type, kw_typeof, kw_var, kw_yield,
781  // Keywords from the Java section.
782  kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
783 
784  CSharpExtraKeywords = std::unordered_set<IdentifierInfo *>(
785  {kw_base, kw_byte, kw_checked, kw_decimal, kw_delegate, kw_event,
786  kw_fixed, kw_foreach, kw_implicit, kw_in, kw_interface, kw_internal,
787  kw_is, kw_lock, kw_null, kw_object, kw_out, kw_override, kw_params,
788  kw_readonly, kw_ref, kw_string, kw_stackalloc, kw_sbyte, kw_sealed,
789  kw_uint, kw_ulong, kw_unchecked, kw_unsafe, kw_ushort,
790  // Keywords from the JavaScript section.
791  kw_as, kw_async, kw_await, kw_declare, kw_finally, kw_from,
792  kw_function, kw_get, kw_import, kw_is, kw_let, kw_module, kw_readonly,
793  kw_set, kw_type, kw_typeof, kw_var, kw_yield,
794  // Keywords from the Java section.
795  kw_abstract, kw_extends, kw_implements, kw_instanceof, kw_interface});
796  }
797 
798  // Context sensitive keywords.
812 
813  // JavaScript keywords.
833 
834  // Java keywords.
845 
846  // Pragma keywords.
848 
849  // Proto keywords.
856 
857  // QT keywords.
862 
863  // C# keywords
875 
880 
882 
893 
894  /// Returns \c true if \p Tok is a true JavaScript identifier, returns
895  /// \c false if it is a keyword or a pseudo keyword.
897  return Tok.is(tok::identifier) &&
898  JsExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
899  JsExtraKeywords.end();
900  }
901 
902  /// Returns \c true if \p Tok is a C# keyword, returns
903  /// \c false if it is a anything else.
904  bool isCSharpKeyword(const FormatToken &Tok) const {
905  switch (Tok.Tok.getKind()) {
906  case tok::kw_bool:
907  case tok::kw_break:
908  case tok::kw_case:
909  case tok::kw_catch:
910  case tok::kw_char:
911  case tok::kw_class:
912  case tok::kw_const:
913  case tok::kw_continue:
914  case tok::kw_default:
915  case tok::kw_do:
916  case tok::kw_double:
917  case tok::kw_else:
918  case tok::kw_enum:
919  case tok::kw_explicit:
920  case tok::kw_extern:
921  case tok::kw_false:
922  case tok::kw_float:
923  case tok::kw_for:
924  case tok::kw_goto:
925  case tok::kw_if:
926  case tok::kw_int:
927  case tok::kw_long:
928  case tok::kw_namespace:
929  case tok::kw_new:
930  case tok::kw_operator:
931  case tok::kw_private:
932  case tok::kw_protected:
933  case tok::kw_public:
934  case tok::kw_return:
935  case tok::kw_short:
936  case tok::kw_sizeof:
937  case tok::kw_static:
938  case tok::kw_struct:
939  case tok::kw_switch:
940  case tok::kw_this:
941  case tok::kw_throw:
942  case tok::kw_true:
943  case tok::kw_try:
944  case tok::kw_typeof:
945  case tok::kw_using:
946  case tok::kw_virtual:
947  case tok::kw_void:
948  case tok::kw_volatile:
949  case tok::kw_while:
950  return true;
951  default:
952  return Tok.is(tok::identifier) &&
953  CSharpExtraKeywords.find(Tok.Tok.getIdentifierInfo()) ==
954  CSharpExtraKeywords.end();
955  }
956  }
957 
958 private:
959  /// The JavaScript keywords beyond the C++ keyword set.
960  std::unordered_set<IdentifierInfo *> JsExtraKeywords;
961 
962  /// The C# keywords beyond the C++ keyword set
963  std::unordered_set<IdentifierInfo *> CSharpExtraKeywords;
964 };
965 
966 } // namespace format
967 } // namespace clang
968 
969 #endif
bool endsSequence(A K1, Ts... Tokens) const
true if this token ends a sequence with the given tokens in order, following the Previous pointers...
Definition: FormatToken.h:359
unsigned NestingLevel
The nesting level of this token, i.e.
Definition: FormatToken.h:235
Token Tok
The Token.
Definition: FormatToken.h:135
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Definition: Token.h:97
CommaSeparatedList(const FormatStyle &Style)
Definition: FormatToken.h:627
std::unique_ptr< TokenRole > Role
A token can have a special role that can carry extra information about the token&#39;s formatting...
Definition: FormatToken.h:211
unsigned OriginalColumn
The original 0-based column of this token, including expanded tabs.
Definition: FormatToken.h:222
bool isMemberAccess() const
Returns true if this is a "." or "->" accessing a member.
Definition: FormatToken.h:407
bool isFunctionLikeKeyword() const
Returns true if this is a keyword that can be used like a function call (e.g.
Definition: FormatToken.h:441
The base class of the type hierarchy.
Definition: Type.h:1450
bool isUnaryOperator() const
Definition: FormatToken.h:413
const FormatToken * getNextNonComment() const
Returns the next token ignoring comments.
Definition: FormatToken.h:497
const FormatToken * getNamespaceToken() const
Return the actual namespace token, if this token starts a namespace block.
Definition: FormatToken.h:539
bool IsMultiline
Whether the token text contains newlines (escaped or not).
Definition: FormatToken.h:164
bool IsFirst
Indicates that this is the first token of the file.
Definition: FormatToken.h:167
unsigned ObjCSelectorNameParts
If this is the first ObjC selector name in an ObjC method definition or call, this contains the numbe...
Definition: FormatToken.h:253
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
Definition: TokenKinds.h:77
bool isAccessSpecifier(bool ColonRequired=true) const
Definition: FormatToken.h:369
bool EndsBinaryExpression
true if this token ends a binary expression.
Definition: FormatToken.h:273
unsigned TotalLength
The total length of the unwrapped line up to and including this token.
Definition: FormatToken.h:218
bool isBinaryOperator() const
Definition: FormatToken.h:429
bool isIf(bool AllowConstexprMacro=true) const
Definition: FormatToken.h:332
unsigned NewlinesBefore
The number of newlines immediately before the Token.
Definition: FormatToken.h:141
FormatToken * Next
The next token in the unwrapped line.
Definition: FormatToken.h:300
tok::TokenKind getKind() const
Definition: Token.h:92
unsigned UnbreakableTailLength
The length of following tokens until the next natural split point, or the next token that can be brok...
Definition: FormatToken.h:226
bool closesScope() const
Returns whether Tok is )]} or a closing > of a template or in protos.
Definition: FormatToken.h:397
unsigned SplitPenalty
Penalty for inserting a line break before this token.
Definition: FormatToken.h:241
prec::Level getPrecedence() const
Definition: FormatToken.h:483
One of these records is kept for each identifier that is lexed.
unsigned ParameterCount
Number of parameters, if this is "(", "[" or "<".
Definition: FormatToken.h:199
IdentifierInfo * kw_NS_CLOSED_ENUM
Definition: FormatToken.h:806
unsigned FakeRParens
Insert this many fake ) after this token for correct indentation.
Definition: FormatToken.h:267
IdentifierInfo * kw_CF_CLOSED_ENUM
Definition: FormatToken.h:803
LineState State
bool CanBreakBefore
true if it is allowed to break before this token.
Definition: FormatToken.h:193
FormatToken * Previous
The previous token in the unwrapped line.
Definition: FormatToken.h:297
AdditionalKeywords(IdentifierTable &IdentTable)
Definition: FormatToken.h:686
bool StartsBinaryExpression
true if this token starts a binary expression, i.e.
Definition: FormatToken.h:271
Token - This structure provides full information about a lexed token.
Definition: Token.h:34
unsigned LongestObjCSelectorName
If this is the first ObjC selector name in an ObjC method definition or call, this contains the lengt...
Definition: FormatToken.h:248
unsigned OperatorIndex
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, contains the 0-based operator index.
Definition: FormatToken.h:277
bool IsJavaScriptIdentifier(const FormatToken &Tok) const
Returns true if Tok is a true JavaScript identifier, returns false if it is a keyword or a pseudo key...
Definition: FormatToken.h:896
unsigned SpacesRequiredBefore
The number of spaces that should be inserted before this token.
Definition: FormatToken.h:190
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
Definition: Lexer.cpp:57
bool isNot(T Kind) const
Definition: FormatToken.h:330
bool closesBlockOrBlockTypeList(const FormatStyle &Style) const
Same as opensBlockOrBlockTypeList, but for the closing token.
Definition: FormatToken.h:531
unsigned BlockParameterCount
Number of parameters that are nested blocks, if this is "(", "[" or "<".
Definition: FormatToken.h:203
void CommaFound(const FormatToken *Token) override
Adds Token as the next comma to the CommaSeparated list.
Definition: FormatToken.h:639
bool isCppStructuredBinding(const FormatStyle &Style) const
Returns whether the token is the left square bracket of a C++ structured binding declaration.
Definition: FormatToken.h:519
bool isCSharpKeyword(const FormatToken &Tok) const
Returns true if Tok is a C# keyword, returns false if it is a anything else.
Definition: FormatToken.h:904
FormatToken * getPreviousNonComment() const
Returns the previous token ignoring comments.
Definition: FormatToken.h:489
SourceLocation getStartOfNonWhitespace() const
Returns actual token start location without leading escaped newlines and whitespace.
Definition: FormatToken.h:479
bool isLabelString() const
Returns true if this is a string literal that&#39;s like a label, e.g.
Definition: FormatToken.h:461
bool isOneOf(A K1, B K2) const
Definition: FormatToken.h:323
virtual void CommaFound(const FormatToken *Token)
Notifies the Role that a comma was found.
Definition: FormatToken.h:617
The current state when indenting a unwrapped line.
ContinuationIndenter * Indenter
Implements an efficient mapping from strings to IdentifierInfo nodes.
ParameterPackingKind PackingKind
If this is an opening parenthesis, how are the parameters packed?
Definition: FormatToken.h:214
PPKeywordKind
Provides a namespace for preprocessor keywords which start with a &#39;#&#39; at the beginning of the line...
Definition: TokenKinds.h:32
IdentifierInfo * kw___has_include_next
Definition: FormatToken.h:811
A wrapper around a Token storing information about the whitespace characters preceding it...
Definition: FormatToken.h:131
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
Defines and computes precedence levels for binary/ternary operators.
SourceLocation getEnd() const
bool isObjCAccessSpecifier() const
Definition: FormatToken.h:377
bool isTrailingComment() const
Definition: FormatToken.h:434
TokenRole(const FormatStyle &Style)
Definition: FormatToken.h:589
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an &#39;@&#39;.
Definition: TokenKinds.h:40
unsigned LastNewlineOffset
The offset just past the last &#39; &#39; in this token&#39;s leading whitespace (relative to WhiteSpaceStart)...
Definition: FormatToken.h:152
const char * getTokenTypeName(TokenType Type)
Determines the name of a token type.
Definition: FormatToken.cpp:24
#define false
Definition: stdbool.h:17
Kind
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
bool closesScopeAfterBlock() const
Definition: FormatToken.h:337
bool is(tok::TokenKind Kind) const
Definition: FormatToken.h:314
Various functions to configurably format source code.
IdentifierInfo * getIdentifierInfo() const
Definition: Token.h:179
Encapsulates keywords that are context sensitive or for languages not properly supported by Clang&#39;s l...
Definition: FormatToken.h:685
SourceRange WhitespaceRange
The range of the whitespace immediately preceding the Token.
Definition: FormatToken.h:148
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
Definition: TokenKinds.h:24
tok::TokenKind ParentBracket
If this is a bracket ("<", "(", "[" or "{"), contains the kind of the surrounding bracket...
Definition: FormatToken.h:207
bool IsUnterminatedLiteral
Set to true if this token is an unterminated literal.
Definition: FormatToken.h:182
StringRef TokenText
The raw text of the token.
Definition: FormatToken.h:179
bool is(TokenType TT) const
Definition: FormatToken.h:315
SmallVector< prec::Level, 4 > FakeLParens
Stores the number of required fake parentheses and the corresponding operator precedence.
Definition: FormatToken.h:265
bool isSimpleTypeSpecifier() const
Determine whether the token is a simple-type-specifier.
Definition: FormatToken.cpp:39
The FormatStyle is used to configure the formatting to follow specific guidelines.
Definition: Format.h:49
unsigned IndentLevel
The indent level of this token. Copied from the surrounding line.
Definition: FormatToken.h:238
Dataflow Directional Tag Classes.
bool is(const IdentifierInfo *II) const
Definition: FormatToken.h:316
unsigned ColumnWidth
The width of the non-whitespace parts of the token (or its first line for multi-line tokens) in colum...
Definition: FormatToken.h:157
bool Finalized
If true, this token has been fully formatted (indented and potentially re-formatted inside)...
Definition: FormatToken.h:312
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
virtual unsigned formatAfterToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun)
Same as formatFromToken, but assumes that the first token has already been set thereby deciding on th...
Definition: FormatToken.h:610
const FormatToken * lastComma() override
Definition: FormatToken.h:643
virtual const FormatToken * lastComma()
Definition: FormatToken.h:619
FormatToken * NextOperator
If this is an operator (or "."/"->") in a sequence of operators with the same precedence, points to the next operator.
Definition: FormatToken.h:281
bool ClosesTemplateDeclaration
true if this is the ">" of "template<..>".
Definition: FormatToken.h:196
FormatToken * MatchingParen
If this is a bracket, this points to the matching one.
Definition: FormatToken.h:294
bool isOneOf(A K1, B K2, Ts... Ks) const
Definition: FormatToken.h:327
SmallVector< AnnotatedLine *, 1 > Children
If this token starts a block, this contains all the unwrapped lines in it.
Definition: FormatToken.h:304
bool startsSequence(A K1, Ts... Tokens) const
true if this token starts a sequence with the given tokens in order, following the Next pointers...
Definition: FormatToken.h:348
bool opensBlockOrBlockTypeList(const FormatStyle &Style) const
Returns true if this tokens starts a block-type list, i.e.
Definition: FormatToken.h:506
bool opensScope() const
Returns whether Tok is ([{ or an opening < of a template or in protos.
Definition: FormatToken.h:387
const FormatStyle & Style
Definition: FormatToken.h:622
bool MustBreakBefore
Whether there must be a line break before this token.
Definition: FormatToken.h:173
virtual unsigned formatFromToken(LineState &State, ContinuationIndenter *Indenter, bool DryRun)
Apply the special formatting that the given role demands.
Definition: FormatToken.h:602
prec::Level getBinOpPrecedence(tok::TokenKind Kind, bool GreaterThanIsOperator, bool CPlusPlus11)
Return the precedence of the specified binary operator token.
A trivial tuple used to represent a source range.
unsigned BindingStrength
The binding strength of a token.
Definition: FormatToken.h:231
FormatDecision Decision
Stores the formatting decision for the token once it was made.
Definition: FormatToken.h:307
bool ContinuesLineCommentSection
Does this line comment continue a line comment section?
Definition: FormatToken.h:291
bool isObjCAtKeyword(tok::ObjCKeywordKind Kind) const
Definition: FormatToken.h:365
bool isStringLiteral() const
Definition: FormatToken.h:363
bool HasUnescapedNewline
Whether there is at least one unescaped newline before the Token.
Definition: FormatToken.h:145
BraceBlockKind BlockKind
Contains the kind of block if this token is a brace.
Definition: FormatToken.h:185
bool PartOfMultiVariableDeclStmt
Is this token part of a DeclStmt defining multiple variables?
Definition: FormatToken.h:286
unsigned ParameterIndex
The 0-based index of the parameter/argument.
Definition: FormatToken.h:258
unsigned LastLineColumnWidth
Contains the width in columns of the last line of a multi-line token.
Definition: FormatToken.h:161
bool is(tok::PPKeywordKind Kind) const
Definition: FormatToken.h:319
const FormatStyle & Style
#define LIST_TOKEN_TYPES
Definition: FormatToken.h:28