14 #ifndef LLVM_CLANG_LEX_LEXER_H 15 #define LLVM_CLANG_LEX_LEXER_H 22 #include "llvm/ADT/Optional.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/ADT/StringRef.h" 37 class DiagnosticBuilder;
70 : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
80 void anchor()
override;
86 const char *BufferStart;
89 const char *BufferEnd;
111 unsigned char ExtendedTokenMode;
120 const char *BufferPtr;
124 bool IsAtStartOfLine;
126 bool IsAtPhysicalStartOfLine;
128 bool HasLeadingSpace;
130 bool HasLeadingEmptyMacro;
135 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
148 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
153 Lexer(
FileID FID,
const llvm::MemoryBuffer *InputBuffer,
180 bool Lex(
Token &Result);
189 void IndirectLex(
Token &Result)
override { Lex(Result); }
196 assert(LexingRawMode &&
"Not already in raw mode!");
200 return BufferPtr == BufferEnd;
208 return ExtendedTokenMode > 1;
214 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
215 "Can only retain whitespace in raw mode or -traditional-cpp");
216 ExtendedTokenMode = Val ? 2 : 0;
222 return ExtendedTokenMode > 0;
229 assert(!isKeepWhitespaceMode() &&
230 "Can't play with comment retention state when retaining whitespace");
231 ExtendedTokenMode = Mode ? 1 : 0;
240 void resetExtendedTokenMode();
244 return StringRef(BufferStart, BufferEnd - BufferStart);
258 SourceLocation getSourceLocation(
const char *Loc,
unsigned TokLen = 1)
const;
263 return getSourceLocation(BufferPtr);
272 static std::string Stringify(StringRef Str,
bool Charify =
false);
288 static unsigned getSpelling(
const Token &
Tok,
const char *&Buffer,
291 bool *Invalid =
nullptr);
298 static std::string getSpelling(
const Token &Tok,
301 bool *Invalid =
nullptr);
315 bool *invalid =
nullptr);
330 bool IgnoreWhiteSpace =
false);
375 : CharSourceRange::getCharRange(
440 bool *Invalid =
nullptr);
470 static StringRef getImmediateMacroNameForDiagnostics(
490 unsigned MaxLines = 0);
508 bool SkipTrailingWhitespaceAndNewLine);
511 static bool isIdentifierBodyChar(
char c,
const LangOptions &LangOpts);
515 static bool isNewLineEscaped(
const char *BufferStart,
const char *Str);
523 if (isObviouslySimpleCharacter(Ptr[0])) {
529 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
544 bool LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine);
546 bool CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
const char *CurPtr);
551 bool LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr);
558 void FormTokenWithChars(
Token &Result,
const char *TokEnd,
560 unsigned TokLen = TokEnd-BufferPtr;
562 Result.
setLocation(getSourceLocation(BufferPtr, TokLen));
570 unsigned isNextPPTokenLParen();
594 static bool isObviouslySimpleCharacter(
char C) {
595 return C !=
'?' && C !=
'\\';
602 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
605 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
608 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
617 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
625 getCharAndSizeSlow(Ptr, Size, &Tok);
633 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
636 if (isObviouslySimpleCharacter(Ptr[0])) {
642 return getCharAndSizeSlow(Ptr, Size);
647 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
648 Token *Tok =
nullptr);
653 static unsigned getEscapedNewLineSize(
const char *
P);
658 static const char *SkipEscapedNewLines(
const char *P);
662 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
668 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
670 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
672 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
673 bool IsStringLiteral);
676 bool LexIdentifier (
Token &Result,
const char *CurPtr);
677 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
678 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
680 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
682 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
683 bool LexCharConstant (
Token &Result,
const char *CurPtr,
685 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
686 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
687 bool &TokAtPhysicalStartOfLine);
688 bool SkipLineComment (
Token &Result,
const char *CurPtr,
689 bool &TokAtPhysicalStartOfLine);
690 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
691 bool &TokAtPhysicalStartOfLine);
692 bool SaveLineComment (
Token &Result,
const char *CurPtr);
694 bool IsStartOfConflictMarker(
const char *CurPtr);
695 bool HandleEndOfConflictMarker(
const char *CurPtr);
697 bool lexEditorPlaceholder(
Token &Result,
const char *CurPtr);
699 bool isCodeCompletionPoint(
const char *CurPtr)
const;
700 void cutOffLexing() { BufferPtr = BufferEnd; }
702 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
717 uint32_t tryReadUCN(
const char *&CurPtr,
const char *SlashLoc,
Token *Tok);
730 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
738 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
743 #endif // LLVM_CLANG_LEX_LEXER_H Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
const char * getBufferLocation() const
Return the current location in the buffer.
DominatorTree GraphTraits specialization so the DominatorTree can be iterable by generic graph iterat...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
StringRef getBuffer() const
Gets source code buffer.
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
A little helper class used to produce diagnostics.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
Defines the clang::LangOptions interface.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
SourceLocation getEnd() const
Encodes a location in the source.
void setLength(unsigned Len)
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
SourceRange getAsRange() const
Dataflow Directional Tag Classes.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
unsigned Size
Size of the preamble in bytes.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
void setLocation(SourceLocation L)
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.