13 #ifndef LLVM_CLANG_LEX_LEXER_H 14 #define LLVM_CLANG_LEX_LEXER_H 21 #include "llvm/ADT/Optional.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringRef.h" 36 class DiagnosticBuilder;
69 : Size(Size), PreambleEndsAtStartOfLine(PreambleEndsAtStartOfLine) {}
79 void anchor()
override;
85 const char *BufferStart;
88 const char *BufferEnd;
110 unsigned char ExtendedTokenMode;
119 const char *BufferPtr;
123 bool IsAtStartOfLine;
125 bool IsAtPhysicalStartOfLine;
127 bool HasLeadingSpace;
129 bool HasLeadingEmptyMacro;
134 void InitLexer(
const char *BufStart,
const char *BufPtr,
const char *BufEnd);
147 const char *BufStart,
const char *BufPtr,
const char *BufEnd);
152 Lexer(
FileID FID,
const llvm::MemoryBuffer *FromFile,
179 bool Lex(
Token &Result);
188 void IndirectLex(
Token &Result)
override { Lex(Result); }
195 assert(LexingRawMode &&
"Not already in raw mode!");
199 return BufferPtr == BufferEnd;
207 return ExtendedTokenMode > 1;
213 assert((!Val || LexingRawMode || LangOpts.TraditionalCPP) &&
214 "Can only retain whitespace in raw mode or -traditional-cpp");
215 ExtendedTokenMode = Val ? 2 : 0;
221 return ExtendedTokenMode > 0;
228 assert(!isKeepWhitespaceMode() &&
229 "Can't play with comment retention state when retaining whitespace");
230 ExtendedTokenMode = Mode ? 1 : 0;
239 void resetExtendedTokenMode();
243 return StringRef(BufferStart, BufferEnd - BufferStart);
257 SourceLocation getSourceLocation(
const char *Loc,
unsigned TokLen = 1)
const;
262 return getSourceLocation(BufferPtr);
270 assert(BufferPtr >= BufferStart &&
"Invalid buffer state");
271 return BufferPtr - BufferStart;
281 bool skipOver(
unsigned NumBytes);
286 static std::string Stringify(StringRef Str,
bool Charify =
false);
302 static unsigned getSpelling(
const Token &
Tok,
const char *&Buffer,
305 bool *Invalid =
nullptr);
312 static std::string getSpelling(
const Token &Tok,
315 bool *Invalid =
nullptr);
329 bool *invalid =
nullptr);
344 bool IgnoreWhiteSpace =
false);
368 getTokenPrefixLength(TokStart, Characters, SM, LangOpts));
399 : CharSourceRange::getCharRange(
464 bool *Invalid =
nullptr);
494 static StringRef getImmediateMacroNameForDiagnostics(
514 unsigned MaxLines = 0);
532 bool SkipTrailingWhitespaceAndNewLine);
535 static bool isIdentifierBodyChar(
char c,
const LangOptions &LangOpts);
539 static bool isNewLineEscaped(
const char *BufferStart,
const char *Str);
547 if (isObviouslySimpleCharacter(Ptr[0])) {
553 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
568 bool LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine);
570 bool CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
const char *CurPtr);
575 bool LexUnicode(
Token &Result, uint32_t C,
const char *CurPtr);
582 void FormTokenWithChars(
Token &Result,
const char *TokEnd,
584 unsigned TokLen = TokEnd-BufferPtr;
586 Result.
setLocation(getSourceLocation(BufferPtr, TokLen));
594 unsigned isNextPPTokenLParen();
618 static bool isObviouslySimpleCharacter(
char C) {
619 return C !=
'?' && C !=
'\\';
626 inline char getAndAdvanceChar(
const char *&Ptr,
Token &Tok) {
629 if (isObviouslySimpleCharacter(Ptr[0]))
return *Ptr++;
632 char C = getCharAndSizeSlow(Ptr, Size, &Tok);
641 const char *ConsumeChar(
const char *Ptr,
unsigned Size,
Token &Tok) {
649 getCharAndSizeSlow(Ptr, Size, &Tok);
657 inline char getCharAndSize(
const char *Ptr,
unsigned &Size) {
660 if (isObviouslySimpleCharacter(Ptr[0])) {
666 return getCharAndSizeSlow(Ptr, Size);
671 char getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
672 Token *Tok =
nullptr);
677 static unsigned getEscapedNewLineSize(
const char *
P);
682 static const char *SkipEscapedNewLines(
const char *P);
686 static char getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
692 void SetByteOffset(
unsigned Offset,
bool StartOfLine);
694 void PropagateLineStartLeadingSpaceInfo(
Token &Result);
696 const char *LexUDSuffix(
Token &Result,
const char *CurPtr,
697 bool IsStringLiteral);
700 bool LexIdentifier (
Token &Result,
const char *CurPtr);
701 bool LexNumericConstant (
Token &Result,
const char *CurPtr);
702 bool LexStringLiteral (
Token &Result,
const char *CurPtr,
704 bool LexRawStringLiteral (
Token &Result,
const char *CurPtr,
706 bool LexAngledStringLiteral(
Token &Result,
const char *CurPtr);
707 bool LexCharConstant (
Token &Result,
const char *CurPtr,
709 bool LexEndOfFile (
Token &Result,
const char *CurPtr);
710 bool SkipWhitespace (
Token &Result,
const char *CurPtr,
711 bool &TokAtPhysicalStartOfLine);
712 bool SkipLineComment (
Token &Result,
const char *CurPtr,
713 bool &TokAtPhysicalStartOfLine);
714 bool SkipBlockComment (
Token &Result,
const char *CurPtr,
715 bool &TokAtPhysicalStartOfLine);
716 bool SaveLineComment (
Token &Result,
const char *CurPtr);
718 bool IsStartOfConflictMarker(
const char *CurPtr);
719 bool HandleEndOfConflictMarker(
const char *CurPtr);
721 bool lexEditorPlaceholder(
Token &Result,
const char *CurPtr);
723 bool isCodeCompletionPoint(
const char *CurPtr)
const;
724 void cutOffLexing() { BufferPtr = BufferEnd; }
726 bool isHexaLiteral(
const char *Start,
const LangOptions &LangOpts);
728 void codeCompleteIncludedFile(
const char *PathStart,
729 const char *CompletionPoint,
bool IsAngled);
743 uint32_t tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
Token *Result);
756 bool tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
764 bool tryConsumeIdentifierUTF8Char(
const char *&CurPtr);
769 #endif // LLVM_CLANG_LEX_LEXER_H Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static DiagnosticBuilder Diag(DiagnosticsEngine *Diags, const LangOptions &Features, FullSourceLoc TokLoc, const char *TokBegin, const char *TokRangeBegin, const char *TokRangeEnd, unsigned DiagID)
Produce a diagnostic highlighting some portion of a literal.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
const char * getBufferLocation() const
Return the current location in the buffer.
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
bool LexFromRawLexer(Token &Result)
LexFromRawLexer - Lex a token from a designated raw lexer (one with no associated preprocessor object...
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static CharSourceRange getAsCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
SourceLocation getFileLoc() const
getFileLoc - Return the File Location for the file we are lexing out of.
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
StringRef getBuffer() const
Gets source code buffer.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Characters, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
bool PreambleEndsAtStartOfLine
Whether the preamble ends at the start of a new line.
A little helper class used to produce diagnostics.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
Defines the clang::LangOptions interface.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
SourceLocation getEnd() const
Encodes a location in the source.
void setLength(unsigned Len)
PreambleBounds(unsigned Size, bool PreambleEndsAtStartOfLine)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
SourceRange getAsRange() const
Dataflow Directional Tag Classes.
static CharSourceRange getAsCharRange(SourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Given a token range, produce a corresponding CharSourceRange that is not a token range.
unsigned getCurrentBufferOffset()
Returns the current lexing offset.
unsigned Size
Size of the preamble in bytes.
Defines the clang::TokenKind enum and support functions.
Defines the clang::SourceLocation class and associated facilities.
Not within a conflict marker.
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
void setLocation(SourceLocation L)
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
Defines the PreprocessorLexer interface.
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
SourceLocation getBegin() const
This class handles loading and caching of source files into memory.
Engages in a tight little dance with the lexer to efficiently preprocess tokens.