31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringSwitch.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Compiler.h" 37 #include "llvm/Support/ConvertUTF.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/MemoryBuffer.h" 40 #include "llvm/Support/NativeFormatting.h" 41 #include "llvm/Support/UnicodeCharRanges.h" 51 using namespace clang;
62 return II->getObjCKeywordID() == objcKey;
69 return tok::objc_not_keyword;
78 void Lexer::anchor() {}
80 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
82 BufferStart = BufStart;
86 assert(BufEnd[0] == 0 &&
87 "We assume that the input buffer has a null character at the end" 88 " to simplify lexing!");
93 if (BufferStart == BufferPtr) {
95 StringRef Buf(BufferStart, BufferEnd - BufferStart);
96 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
97 .StartsWith(
"\xEF\xBB\xBF", 3)
101 BufferPtr += BOMLength;
104 Is_PragmaLexer =
false;
105 CurrentConflictMarkerState =
CMK_None;
108 IsAtStartOfLine =
true;
109 IsAtPhysicalStartOfLine =
true;
111 HasLeadingSpace =
false;
112 HasLeadingEmptyMacro =
false;
115 ParsingPreprocessorDirective =
false;
118 ParsingFilename =
false;
124 LexingRawMode =
false;
127 ExtendedTokenMode = 0;
136 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
137 LangOpts(PP.getLangOpts()) {
138 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
139 InputFile->getBufferEnd());
148 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
149 : FileLoc(fileloc), LangOpts(langOpts) {
150 InitLexer(BufStart, BufPtr, BufEnd);
161 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
162 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
165 assert(
PP &&
"Cannot reset token mode without a preprocessor");
166 if (LangOpts.TraditionalCPP)
195 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
196 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
203 L->BufferPtr = StrData;
204 L->BufferEnd = StrData+TokLen;
205 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
211 ExpansionLocEnd, TokLen);
218 L->Is_PragmaLexer =
true;
223 typename T::size_type i = 0, e = Str.size();
225 if (Str[i] ==
'\\' || Str[i] == Quote) {
226 Str.insert(Str.begin() + i,
'\\');
229 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
231 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
232 Str[i] != Str[i + 1]) {
238 Str.insert(Str.begin() + i + 1,
'n');
249 char Quote = Charify ?
'\'' :
'"';
264 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
267 const char *BufEnd = BufPtr + Tok.
getLength();
271 while (BufPtr < BufEnd) {
276 if (Spelling[Length - 1] ==
'"')
284 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
287 const char *RawEnd = BufEnd;
288 do --RawEnd;
while (*RawEnd !=
'"');
289 size_t RawLength = RawEnd - BufPtr + 1;
292 memcpy(Spelling + Length, BufPtr, RawLength);
300 while (BufPtr < BufEnd) {
307 "NeedsCleaning flag set on token that didn't need cleaning!");
325 bool invalidTemp =
false;
326 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
328 if (invalid) *invalid =
true;
332 const char *tokenBegin = file.data() + locInfo.second;
336 file.begin(), tokenBegin, file.end());
338 lexer.LexFromRawLexer(token);
340 unsigned length = token.getLength();
343 if (!token.needsCleaning())
344 return StringRef(tokenBegin, length);
347 buffer.resize(length);
348 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
349 return StringRef(buffer.data(), buffer.size());
359 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
361 bool CharDataInvalid =
false;
365 *Invalid = CharDataInvalid;
371 return std::string(TokStart, TokStart + Tok.
getLength());
375 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
392 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
394 const char *TokStart =
nullptr;
396 if (Tok.
is(tok::raw_identifier))
401 Buffer = II->getNameStart();
402 return II->getLength();
412 bool CharDataInvalid =
false;
415 *Invalid = CharDataInvalid;
416 if (CharDataInvalid) {
429 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
450 bool IgnoreWhiteSpace) {
461 bool Invalid =
false;
462 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
466 const char *StrData = Buffer.data()+LocInfo.second;
473 Buffer.begin(), StrData, Buffer.end());
475 TheLexer.LexFromRawLexer(Result);
482 const char *BufStart = Buffer.data();
483 if (Offset >= Buffer.size())
486 const char *LexStart = BufStart +
Offset;
487 for (; LexStart != BufStart; --LexStart) {
503 if (LocInfo.first.isInvalid())
506 bool Invalid =
false;
507 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
513 const char *StrData = Buffer.data() + LocInfo.second;
515 if (!LexStart || LexStart == StrData)
520 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
527 TheLexer.LexFromRawLexer(TheTok);
529 if (TheLexer.getBufferLocation() > StrData) {
533 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
558 std::pair<FileID, unsigned> BeginFileLocInfo =
560 assert(FileLocInfo.first == BeginFileLocInfo.first &&
561 FileLocInfo.second >= BeginFileLocInfo.second);
580 const unsigned StartOffset = 1;
582 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
586 bool InPreprocessorDirective =
false;
590 unsigned MaxLineOffset = 0;
592 const char *CurPtr = Buffer.begin();
593 unsigned CurLine = 0;
594 while (CurPtr != Buffer.end()) {
598 if (CurLine == MaxLines)
602 if (CurPtr != Buffer.end())
603 MaxLineOffset = CurPtr - Buffer.begin();
607 TheLexer.LexFromRawLexer(TheTok);
609 if (InPreprocessorDirective) {
622 InPreprocessorDirective =
false;
631 if (MaxLineOffset && TokOffset >= MaxLineOffset)
636 if (TheTok.
getKind() == tok::comment) {
644 Token HashTok = TheTok;
645 InPreprocessorDirective =
true;
651 TheLexer.LexFromRawLexer(TheTok);
655 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
656 .Case(
"include", PDK_Skipped)
657 .Case(
"__include_macros", PDK_Skipped)
658 .Case(
"define", PDK_Skipped)
659 .Case(
"undef", PDK_Skipped)
660 .Case(
"line", PDK_Skipped)
661 .Case(
"error", PDK_Skipped)
662 .Case(
"pragma", PDK_Skipped)
663 .Case(
"import", PDK_Skipped)
664 .Case(
"include_next", PDK_Skipped)
665 .Case(
"warning", PDK_Skipped)
666 .Case(
"ident", PDK_Skipped)
667 .Case(
"sccs", PDK_Skipped)
668 .Case(
"assert", PDK_Skipped)
669 .Case(
"unassert", PDK_Skipped)
670 .Case(
"if", PDK_Skipped)
671 .Case(
"ifdef", PDK_Skipped)
672 .Case(
"ifndef", PDK_Skipped)
673 .Case(
"elif", PDK_Skipped)
674 .Case(
"else", PDK_Skipped)
675 .Case(
"endif", PDK_Skipped)
676 .Default(PDK_Unknown);
691 InPreprocessorDirective =
false;
702 if (ActiveCommentLoc.
isValid())
703 End = ActiveCommentLoc;
720 bool Invalid =
false;
724 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
727 unsigned PhysOffset = 0;
732 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
742 for (; CharNo; --CharNo) {
753 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
754 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
806 if (expansionLoc.isFileID()) {
809 *MacroBegin = expansionLoc;
837 *MacroEnd = expansionLoc;
911 bool Invalid =
false;
941 if (Invalid) *Invalid =
true;
947 if (beginInfo.first.isInvalid()) {
948 if (Invalid) *Invalid =
true;
954 beginInfo.second > EndOffs) {
955 if (Invalid) *Invalid =
true;
960 bool invalidTemp =
false;
961 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
963 if (Invalid) *Invalid =
true;
967 if (Invalid) *Invalid =
false;
968 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
974 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1014 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1015 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1020 assert(Loc.
isMacroID() &&
"Only reasonble to call this on macros");
1039 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1040 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1049 if (Str - 1 < BufferStart)
1052 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1053 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1054 if (Str - 2 < BufferStart)
1064 return *Str ==
'\\';
1072 if (LocInfo.first.isInvalid())
1074 bool Invalid =
false;
1075 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
1081 StringRef Rest = Buffer.substr(Line - Buffer.data());
1082 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1083 return NumWhitespaceChars == StringRef::npos
1085 : Rest.take_front(NumWhitespaceChars);
1100 unsigned CharNo,
unsigned TokLen) {
1101 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1115 std::pair<SourceLocation,SourceLocation> II =
1116 SM.getImmediateExpansionRange(FileLoc);
1118 return SM.createExpansionLoc(SpellingLoc, II.first, II.second, TokLen);
1124 unsigned TokLen)
const {
1125 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1126 "Location out of range for this buffer!");
1130 unsigned CharNo = Loc-BufferStart;
1136 assert(PP &&
"This doesn't work on raw lexers");
1155 case '=':
return '#';
1156 case ')':
return ']';
1157 case '(':
return '[';
1158 case '!':
return '|';
1159 case '\'':
return '^';
1160 case '>':
return '}';
1161 case '/':
return '\\';
1162 case '<':
return '{';
1163 case '-':
return '~';
1173 if (!Res || !L)
return Res;
1177 L->
Diag(CP-2, diag::trigraph_ignored);
1182 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1189 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1194 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1198 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1199 Ptr[Size-1] != Ptr[Size])
1212 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1214 const char *AfterEscape;
1217 }
else if (*P ==
'?') {
1219 if (P[1] !=
'?' || P[2] !=
'/')
1228 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1229 if (NewLineSize == 0)
return P;
1230 P = AfterEscape+NewLineSize;
1247 bool InvalidTemp =
false;
1248 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1252 const char *TokenBegin = File.data() + LocInfo.second;
1256 TokenBegin, File.end());
1259 lexer.LexFromRawLexer(Tok);
1269 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1271 if (!Tok || Tok->isNot(TKind))
1276 unsigned NumWhitespaceChars = 0;
1277 if (SkipTrailingWhitespaceAndNewLine) {
1279 unsigned char C = *TokenEnd;
1282 NumWhitespaceChars++;
1286 if (C ==
'\n' || C ==
'\r') {
1289 NumWhitespaceChars++;
1290 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1291 NumWhitespaceChars++;
1313 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1316 if (Ptr[0] ==
'\\') {
1325 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1331 Diag(Ptr, diag::backslash_newline_space);
1334 Size += EscapedNewLineSize;
1335 Ptr += EscapedNewLineSize;
1338 return getCharAndSizeSlow(Ptr, Size, Tok);
1346 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1355 if (
C ==
'\\')
goto Slash;
1371 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1374 if (Ptr[0] ==
'\\') {
1382 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1384 Size += EscapedNewLineSize;
1385 Ptr += EscapedNewLineSize;
1388 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1396 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1402 if (
C ==
'\\')
goto Slash;
1417 void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1418 BufferPtr = BufferStart +
Offset;
1419 if (BufferPtr > BufferEnd)
1420 BufferPtr = BufferEnd;
1424 IsAtStartOfLine = StartOfLine;
1425 IsAtPhysicalStartOfLine = StartOfLine;
1429 if (LangOpts.AsmPreprocessor) {
1431 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1432 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1434 return C11AllowedIDChars.contains(C);
1435 }
else if (LangOpts.CPlusPlus) {
1436 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1438 return CXX03AllowedIDChars.contains(C);
1440 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1442 return C99AllowedIDChars.contains(C);
1448 if (LangOpts.AsmPreprocessor) {
1450 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1451 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1453 return !C11DisallowedInitialIDChars.contains(C);
1454 }
else if (LangOpts.CPlusPlus) {
1457 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1459 return !C99DisallowedInitialIDChars.contains(C);
1474 CannotAppearInIdentifier = 0,
1475 CannotStartIdentifier
1478 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1480 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1482 if (!C99AllowedIDChars.contains(C)) {
1485 << CannotAppearInIdentifier;
1486 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1489 << CannotStartIdentifier;
1495 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1497 if (!CXX03AllowedIDChars.contains(C)) {
1498 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1511 struct HomoglyphPair {
1514 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1516 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1559 std::lower_bound(std::begin(SortedHomoglyphs),
1560 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1561 if (Homoglyph->Character == C) {
1564 llvm::raw_svector_ostream CharOS(CharBuf);
1565 llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
1567 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1569 << Range << CharBuf << LooksLikeStr;
1573 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1575 const char *UCNPtr = CurPtr + Size;
1576 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1586 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1587 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1590 while (CurPtr != UCNPtr)
1591 (void)getAndAdvanceChar(CurPtr, Result);
1595 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1596 const char *UnicodePtr = CurPtr;
1597 llvm::UTF32 CodePoint;
1598 llvm::ConversionResult Result =
1599 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1600 (
const llvm::UTF8 *)BufferEnd,
1602 llvm::strictConversion);
1603 if (Result != llvm::conversionOK ||
1615 CurPtr = UnicodePtr;
1619 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1622 unsigned char C = *CurPtr++;
1633 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1634 (C !=
'$' || !LangOpts.DollarIdents)) {
1636 const char *IdStart = BufferPtr;
1637 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1654 if (II->
getTokenID() == tok::identifier && isCodeCompletionPoint(CurPtr)
1658 Result.
setKind(tok::code_completion);
1667 C = getCharAndSize(CurPtr, Size);
1671 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1675 Diag(CurPtr, diag::ext_dollar_in_identifier);
1676 CurPtr = ConsumeChar(CurPtr, Size, Result);
1677 C = getCharAndSize(CurPtr, Size);
1679 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1680 C = getCharAndSize(CurPtr, Size);
1682 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1683 C = getCharAndSize(CurPtr, Size);
1686 goto FinishIdentifier;
1690 CurPtr = ConsumeChar(CurPtr, Size, Result);
1692 C = getCharAndSize(CurPtr, Size);
1694 CurPtr = ConsumeChar(CurPtr, Size, Result);
1695 C = getCharAndSize(CurPtr, Size);
1702 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1708 return (C2 ==
'x' || C2 ==
'X');
1714 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1716 char C = getCharAndSize(CurPtr, Size);
1719 CurPtr = ConsumeChar(CurPtr, Size, Result);
1721 C = getCharAndSize(CurPtr, Size);
1725 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1728 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1729 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1733 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1737 bool IsHexFloat =
true;
1738 if (!LangOpts.C99) {
1739 if (!isHexaLiteral(BufferPtr, LangOpts))
1742 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1746 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1755 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1756 CurPtr = ConsumeChar(CurPtr, Size, Result);
1757 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1758 return LexNumericConstant(Result, CurPtr);
1763 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1764 return LexNumericConstant(Result, CurPtr);
1765 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1766 return LexNumericConstant(Result, CurPtr);
1769 const char *TokStart = BufferPtr;
1770 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1777 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1778 bool IsStringLiteral) {
1783 char C = getCharAndSize(CurPtr, Size);
1784 bool Consumed =
false;
1787 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1789 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1798 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1799 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1810 bool IsUDSuffix =
false;
1817 const unsigned MaxStandardSuffixLength = 3;
1818 char Buffer[MaxStandardSuffixLength] = { C };
1819 unsigned Consumed = Size;
1827 const StringRef CompleteSuffix(Buffer, Chars);
1833 if (Chars == MaxStandardSuffixLength)
1837 Buffer[Chars++] = Next;
1838 Consumed += NextSize;
1845 ? diag::ext_ms_reserved_user_defined_literal
1846 : diag::ext_reserved_user_defined_literal)
1851 CurPtr = ConsumeChar(CurPtr, Size, Result);
1856 C = getCharAndSize(CurPtr, Size);
1858 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1859 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1868 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1871 const char *NulCharacter =
nullptr;
1874 (Kind == tok::utf8_string_literal ||
1875 Kind == tok::utf16_string_literal ||
1876 Kind == tok::utf32_string_literal))
1878 ? diag::warn_cxx98_compat_unicode_literal
1879 : diag::warn_c99_compat_unicode_literal);
1881 char C = getAndAdvanceChar(CurPtr, Result);
1886 C = getAndAdvanceChar(CurPtr, Result);
1888 if (C ==
'\n' || C ==
'\r' ||
1889 (C == 0 && CurPtr-1 == BufferEnd)) {
1891 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1892 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1897 if (isCodeCompletionPoint(CurPtr-1)) {
1899 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1904 NulCharacter = CurPtr-1;
1906 C = getAndAdvanceChar(CurPtr, Result);
1911 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1915 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1918 const char *TokStart = BufferPtr;
1919 FormTokenWithChars(Result, CurPtr, Kind);
1926 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1934 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1936 unsigned PrefixLen = 0;
1942 if (CurPtr[PrefixLen] !=
'(') {
1944 const char *PrefixEnd = &CurPtr[PrefixLen];
1945 if (PrefixLen == 16) {
1946 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1948 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1949 << StringRef(PrefixEnd, 1);
1961 if (C == 0 && CurPtr-1 == BufferEnd) {
1967 FormTokenWithChars(Result, CurPtr, tok::unknown);
1972 const char *Prefix = CurPtr;
1973 CurPtr += PrefixLen + 1;
1980 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
1981 CurPtr += PrefixLen + 1;
1984 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
1986 Diag(BufferPtr, diag::err_unterminated_raw_string)
1987 << StringRef(Prefix, PrefixLen);
1988 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1995 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1998 const char *TokStart = BufferPtr;
1999 FormTokenWithChars(Result, CurPtr, Kind);
2006 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
2008 const char *NulCharacter =
nullptr;
2009 const char *AfterLessPos = CurPtr;
2010 char C = getAndAdvanceChar(CurPtr, Result);
2015 C = getAndAdvanceChar(CurPtr, Result);
2017 if (C ==
'\n' || C ==
'\r' ||
2018 (C == 0 && (CurPtr-1 == BufferEnd ||
2019 isCodeCompletionPoint(CurPtr-1)))) {
2022 FormTokenWithChars(Result, AfterLessPos, tok::less);
2027 NulCharacter = CurPtr-1;
2029 C = getAndAdvanceChar(CurPtr, Result);
2034 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2037 const char *TokStart = BufferPtr;
2038 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
2045 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
2048 const char *NulCharacter =
nullptr;
2051 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2053 ? diag::warn_cxx98_compat_unicode_literal
2054 : diag::warn_c99_compat_unicode_literal);
2055 else if (Kind == tok::utf8_char_constant)
2056 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2059 char C = getAndAdvanceChar(CurPtr, Result);
2062 Diag(BufferPtr, diag::ext_empty_character);
2063 FormTokenWithChars(Result, CurPtr, tok::unknown);
2070 C = getAndAdvanceChar(CurPtr, Result);
2072 if (C ==
'\n' || C ==
'\r' ||
2073 (C == 0 && CurPtr-1 == BufferEnd)) {
2075 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2076 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2081 if (isCodeCompletionPoint(CurPtr-1)) {
2083 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2088 NulCharacter = CurPtr-1;
2090 C = getAndAdvanceChar(CurPtr, Result);
2095 CurPtr = LexUDSuffix(Result, CurPtr,
false);
2099 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2102 const char *TokStart = BufferPtr;
2103 FormTokenWithChars(Result, CurPtr, Kind);
2112 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2113 bool &TokAtPhysicalStartOfLine) {
2117 unsigned char Char = *CurPtr;
2142 FormTokenWithChars(Result, CurPtr, tok::unknown);
2144 IsAtStartOfLine =
true;
2145 IsAtPhysicalStartOfLine =
true;
2152 char PrevChar = CurPtr[-1];
2158 TokAtPhysicalStartOfLine =
true;
2171 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2172 bool &TokAtPhysicalStartOfLine) {
2176 Diag(BufferPtr, diag::ext_line_comment);
2180 LangOpts.LineComment =
true;
2194 C !=
'\n' && C !=
'\r')
2197 const char *NextLine = CurPtr;
2200 const char *EscapePtr = CurPtr-1;
2201 bool HasSpace =
false;
2207 if (*EscapePtr ==
'\\')
2210 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2211 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2213 CurPtr = EscapePtr-2;
2219 Diag(EscapePtr, diag::backslash_newline_space);
2226 const char *OldPtr = CurPtr;
2229 C = getAndAdvanceChar(CurPtr, Result);
2234 if (C != 0 && CurPtr == OldPtr+1) {
2242 if (CurPtr != OldPtr + 1 && C !=
'/' &&
2243 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2244 for (; OldPtr != CurPtr; ++OldPtr)
2245 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2249 const char *ForwardPtr = CurPtr;
2252 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2257 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2262 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2267 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2285 return SaveLineComment(Result, CurPtr);
2303 TokAtPhysicalStartOfLine =
true;
2312 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2315 FormTokenWithChars(Result, CurPtr, tok::comment);
2322 bool Invalid =
false;
2323 std::string Spelling = PP->
getSpelling(Result, &Invalid);
2327 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2342 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2348 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2350 if (CurPtr[0] == CurPtr[1])
2358 bool HasSpace =
false;
2365 if (*CurPtr ==
'\\') {
2366 if (CurPtr[-1] !=
'*')
return false;
2369 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2380 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2384 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2389 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2393 L->
Diag(CurPtr, diag::backslash_newline_space);
2414 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2415 bool &TokAtPhysicalStartOfLine) {
2425 unsigned char C = getCharAndSize(CurPtr, CharSize);
2427 if (C == 0 && CurPtr == BufferEnd+1) {
2429 Diag(BufferPtr, diag::err_unterminated_block_comment);
2435 FormTokenWithChars(Result, CurPtr, tok::unknown);
2451 if (CurPtr + 24 < BufferEnd &&
2456 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2459 if (C ==
'/')
goto FoundSlash;
2463 while (CurPtr+16 <= BufferEnd) {
2470 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2476 __vector
unsigned char Slashes = {
2477 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2478 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/' 2480 while (CurPtr+16 <= BufferEnd &&
2481 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2485 while (CurPtr[0] !=
'/' &&
2489 CurPtr+4 < BufferEnd) {
2499 while (C !=
'/' && C !=
'\0')
2504 if (CurPtr[-2] ==
'*')
2507 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2514 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2519 Diag(CurPtr-1, diag::warn_nested_block_comment);
2521 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2523 Diag(BufferPtr, diag::err_unterminated_block_comment);
2532 FormTokenWithChars(Result, CurPtr, tok::unknown);
2538 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2557 FormTokenWithChars(Result, CurPtr, tok::comment);
2566 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2584 "Must be in a preprocessing directive!");
2588 const char *CurPtr = BufferPtr;
2590 char Char = getAndAdvanceChar(CurPtr, Tmp);
2594 Result->push_back(Char);
2598 if (CurPtr-1 != BufferEnd) {
2599 if (isCodeCompletionPoint(CurPtr-1)) {
2607 Result->push_back(Char);
2615 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2616 BufferPtr = CurPtr-1;
2620 if (Tmp.
is(tok::code_completion)) {
2625 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2637 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2645 FormTokenWithChars(Result, CurPtr, tok::eod);
2657 BufferPtr = BufferEnd;
2658 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2673 diag::err_pp_unterminated_conditional);
2679 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2684 if (LangOpts.CPlusPlus11) {
2688 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2689 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2691 DiagID = diag::warn_no_newline_eof;
2694 DiagID = diag::ext_no_newline_eof;
2697 Diag(BufferEnd, DiagID)
2711 unsigned Lexer::isNextPPTokenLParen() {
2712 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2720 const char *TmpBufferPtr = BufferPtr;
2722 bool atStartOfLine = IsAtStartOfLine;
2723 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2724 bool leadingSpace = HasLeadingSpace;
2730 BufferPtr = TmpBufferPtr;
2732 HasLeadingSpace = leadingSpace;
2733 IsAtStartOfLine = atStartOfLine;
2734 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2741 return Tok.
is(tok::l_paren);
2747 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2749 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2750 size_t Pos = RestOfBuffer.find(Terminator);
2751 while (Pos != StringRef::npos) {
2754 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2755 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2756 Pos = RestOfBuffer.find(Terminator);
2759 return RestOfBuffer.data()+Pos;
2768 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2770 if (CurPtr != BufferStart &&
2771 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2775 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2776 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2791 Diag(CurPtr, diag::err_conflict_marker);
2792 CurrentConflictMarkerState =
Kind;
2796 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2797 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2812 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2814 if (CurPtr != BufferStart &&
2815 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2824 for (
unsigned i = 1; i != 4; ++i)
2825 if (CurPtr[i] != CurPtr[0])
2832 CurrentConflictMarkerState)) {
2836 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2842 CurrentConflictMarkerState =
CMK_None;
2850 const char *BufferEnd) {
2851 if (CurPtr == BufferEnd)
2854 for (; CurPtr != BufferEnd; ++CurPtr) {
2855 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2861 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2862 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2868 const char *Start = CurPtr - 1;
2869 if (!LangOpts.AllowEditorPlaceholders)
2870 Diag(Start, diag::err_placeholder_in_source);
2872 FormTokenWithChars(Result, End, tok::raw_identifier);
2880 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2889 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2892 char Kind = getCharAndSize(StartPtr, CharSize);
2894 unsigned NumHexDigits;
2897 else if (Kind ==
'U')
2902 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2904 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2908 const char *CurPtr = StartPtr + CharSize;
2909 const char *KindLoc = &CurPtr[-1];
2911 uint32_t CodePoint = 0;
2912 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2913 char C = getCharAndSize(CurPtr, CharSize);
2915 unsigned Value = llvm::hexDigitValue(C);
2919 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2920 << StringRef(KindLoc, 1);
2922 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2925 if (i == 4 && NumHexDigits == 8) {
2927 Diag(KindLoc, diag::note_ucn_four_not_eight)
2944 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
2947 while (StartPtr != CurPtr)
2948 (void)getAndAdvanceChar(StartPtr, *Result);
2954 if (LangOpts.AsmPreprocessor)
2968 if (CodePoint < 0xA0) {
2969 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
2975 if (CodePoint < 0x20 || CodePoint >= 0x7F)
2976 Diag(BufferPtr, diag::err_ucn_control_character);
2978 char C =
static_cast<char>(CodePoint);
2979 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
2984 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
2989 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
2990 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
2992 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3000 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
3001 const char *CurPtr) {
3002 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
3005 UnicodeWhitespaceChars.contains(
C)) {
3006 Diag(BufferPtr, diag::ext_unicode_whitespace)
3015 bool Lexer::LexUnicode(
Token &Result, uint32_t
C,
const char *CurPtr) {
3025 return LexIdentifier(Result, CurPtr);
3040 Diag(BufferPtr, diag::err_non_ascii)
3050 FormTokenWithChars(Result, CurPtr, tok::unknown);
3054 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
3061 bool Lexer::Lex(
Token &Result) {
3066 if (IsAtStartOfLine) {
3068 IsAtStartOfLine =
false;
3071 if (HasLeadingSpace) {
3073 HasLeadingSpace =
false;
3076 if (HasLeadingEmptyMacro) {
3078 HasLeadingEmptyMacro =
false;
3081 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3082 IsAtPhysicalStartOfLine =
false;
3085 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3087 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3088 return returnedToken;
3096 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
3103 const char *CurPtr = BufferPtr;
3106 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3108 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3115 FormTokenWithChars(Result, CurPtr, tok::unknown);
3124 unsigned SizeTmp, SizeTmp2;
3127 char Char = getAndAdvanceChar(CurPtr, Result);
3133 if (CurPtr-1 == BufferEnd)
3134 return LexEndOfFile(Result, CurPtr-1);
3137 if (isCodeCompletionPoint(CurPtr-1)) {
3140 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3145 Diag(CurPtr-1, diag::null_in_file);
3147 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3156 if (LangOpts.MicrosoftExt) {
3158 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3159 return LexEndOfFile(Result, CurPtr-1);
3163 Kind = tok::unknown;
3167 if (CurPtr[0] ==
'\n')
3168 Char = getAndAdvanceChar(CurPtr, Result);
3182 IsAtStartOfLine =
true;
3183 IsAtPhysicalStartOfLine =
true;
3192 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3202 SkipHorizontalWhitespace:
3204 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3213 LangOpts.LineComment &&
3214 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3215 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3217 goto SkipIgnoredUnits;
3219 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3221 goto SkipIgnoredUnits;
3223 goto SkipHorizontalWhitespace;
3231 case '0':
case '1':
case '2':
case '3':
case '4':
3232 case '5':
case '6':
case '7':
case '8':
case '9':
3235 return LexNumericConstant(Result, CurPtr);
3241 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3242 Char = getCharAndSize(CurPtr, SizeTmp);
3246 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3247 tok::utf16_string_literal);
3251 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3252 tok::utf16_char_constant);
3255 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3256 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3257 return LexRawStringLiteral(Result,
3258 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3260 tok::utf16_string_literal);
3263 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3267 return LexStringLiteral(Result,
3268 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3270 tok::utf8_string_literal);
3271 if (Char2 ==
'\'' && LangOpts.CPlusPlus17)
3272 return LexCharConstant(
3273 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3275 tok::utf8_char_constant);
3277 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3279 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3282 return LexRawStringLiteral(Result,
3283 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3286 tok::utf8_string_literal);
3293 return LexIdentifier(Result, CurPtr);
3299 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3300 Char = getCharAndSize(CurPtr, SizeTmp);
3304 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3305 tok::utf32_string_literal);
3309 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3310 tok::utf32_char_constant);
3313 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3314 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3315 return LexRawStringLiteral(Result,
3316 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3318 tok::utf32_string_literal);
3322 return LexIdentifier(Result, CurPtr);
3328 if (LangOpts.CPlusPlus11) {
3329 Char = getCharAndSize(CurPtr, SizeTmp);
3332 return LexRawStringLiteral(Result,
3333 ConsumeChar(CurPtr, SizeTmp, Result),
3334 tok::string_literal);
3338 return LexIdentifier(Result, CurPtr);
3343 Char = getCharAndSize(CurPtr, SizeTmp);
3347 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3348 tok::wide_string_literal);
3351 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3352 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3353 return LexRawStringLiteral(Result,
3354 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3356 tok::wide_string_literal);
3360 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3361 tok::wide_char_constant);
3366 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3367 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3368 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3369 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3370 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3371 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3372 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3373 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3377 return LexIdentifier(Result, CurPtr);
3380 if (LangOpts.DollarIdents) {
3382 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3385 return LexIdentifier(Result, CurPtr);
3388 Kind = tok::unknown;
3395 return LexCharConstant(Result, CurPtr, tok::char_constant);
3401 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3405 Kind = tok::question;
3408 Kind = tok::l_square;
3411 Kind = tok::r_square;
3414 Kind = tok::l_paren;
3417 Kind = tok::r_paren;
3420 Kind = tok::l_brace;
3423 Kind = tok::r_brace;
3426 Char = getCharAndSize(CurPtr, SizeTmp);
3427 if (Char >=
'0' && Char <=
'9') {
3431 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3432 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3433 Kind = tok::periodstar;
3435 }
else if (Char ==
'.' &&
3436 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3437 Kind = tok::ellipsis;
3438 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3445 Char = getCharAndSize(CurPtr, SizeTmp);
3448 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3449 }
else if (Char ==
'=') {
3450 Kind = tok::ampequal;
3451 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3457 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3458 Kind = tok::starequal;
3459 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3465 Char = getCharAndSize(CurPtr, SizeTmp);
3467 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3468 Kind = tok::plusplus;
3469 }
else if (Char ==
'=') {
3470 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3471 Kind = tok::plusequal;
3477 Char = getCharAndSize(CurPtr, SizeTmp);
3479 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3480 Kind = tok::minusminus;
3481 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3482 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3483 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3485 Kind = tok::arrowstar;
3486 }
else if (Char ==
'>') {
3487 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3489 }
else if (Char ==
'=') {
3490 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3491 Kind = tok::minusequal;
3500 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3501 Kind = tok::exclaimequal;
3502 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3504 Kind = tok::exclaim;
3509 Char = getCharAndSize(CurPtr, SizeTmp);
3519 bool TreatAsComment = LangOpts.LineComment &&
3520 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3521 if (!TreatAsComment)
3523 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3525 if (TreatAsComment) {
3526 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3527 TokAtPhysicalStartOfLine))
3533 goto SkipIgnoredUnits;
3538 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3539 TokAtPhysicalStartOfLine))
3548 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3549 Kind = tok::slashequal;
3555 Char = getCharAndSize(CurPtr, SizeTmp);
3557 Kind = tok::percentequal;
3558 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3559 }
else if (LangOpts.Digraphs && Char ==
'>') {
3560 Kind = tok::r_brace;
3561 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3562 }
else if (LangOpts.Digraphs && Char ==
':') {
3563 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3564 Char = getCharAndSize(CurPtr, SizeTmp);
3565 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3566 Kind = tok::hashhash;
3567 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3569 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3570 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3572 Diag(BufferPtr, diag::ext_charize_microsoft);
3579 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3580 goto HandleDirective;
3585 Kind = tok::percent;
3589 Char = getCharAndSize(CurPtr, SizeTmp);
3591 return LexAngledStringLiteral(Result, CurPtr);
3592 }
else if (Char ==
'<') {
3593 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3595 Kind = tok::lesslessequal;
3596 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3598 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3602 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3606 }
else if (LangOpts.CUDA && After ==
'<') {
3607 Kind = tok::lesslessless;
3608 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3611 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3612 Kind = tok::lessless;
3614 }
else if (Char ==
'=') {
3615 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3619 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
3620 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3622 Kind = tok::spaceship;
3628 Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
3633 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3634 Kind = tok::lessequal;
3635 }
else if (LangOpts.Digraphs && Char ==
':') {
3636 if (LangOpts.CPlusPlus11 &&
3637 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3644 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3645 if (After !=
':' && After !=
'>') {
3648 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3653 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3654 Kind = tok::l_square;
3655 }
else if (LangOpts.Digraphs && Char ==
'%') {
3656 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3657 Kind = tok::l_brace;
3658 }
else if (Char ==
'#' && SizeTmp == 1 &&
3659 lexEditorPlaceholder(Result, CurPtr)) {
3666 Char = getCharAndSize(CurPtr, SizeTmp);
3668 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3669 Kind = tok::greaterequal;
3670 }
else if (Char ==
'>') {
3671 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3673 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3675 Kind = tok::greatergreaterequal;
3676 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3680 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3683 }
else if (LangOpts.CUDA && After ==
'>') {
3684 Kind = tok::greatergreatergreater;
3685 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3688 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3689 Kind = tok::greatergreater;
3692 Kind = tok::greater;
3696 Char = getCharAndSize(CurPtr, SizeTmp);
3698 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3699 Kind = tok::caretequal;
3700 }
else if (LangOpts.OpenCL && Char ==
'^') {
3701 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3702 Kind = tok::caretcaret;
3708 Char = getCharAndSize(CurPtr, SizeTmp);
3710 Kind = tok::pipeequal;
3711 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3712 }
else if (Char ==
'|') {
3714 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3716 Kind = tok::pipepipe;
3717 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3723 Char = getCharAndSize(CurPtr, SizeTmp);
3724 if (LangOpts.Digraphs && Char ==
'>') {
3725 Kind = tok::r_square;
3726 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3727 }
else if ((LangOpts.CPlusPlus ||
3728 LangOpts.DoubleSquareBracketAttributes) &&
3730 Kind = tok::coloncolon;
3731 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3740 Char = getCharAndSize(CurPtr, SizeTmp);
3743 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3746 Kind = tok::equalequal;
3747 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3756 Char = getCharAndSize(CurPtr, SizeTmp);
3758 Kind = tok::hashhash;
3759 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3760 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3763 Diag(BufferPtr, diag::ext_charize_microsoft);
3764 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3770 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3771 goto HandleDirective;
3779 if (CurPtr[-1] ==
'@' && LangOpts.ObjC1)
3782 Kind = tok::unknown;
3787 if (!LangOpts.AsmPreprocessor) {
3788 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3789 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3790 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3798 return LexUnicode(Result, CodePoint, CurPtr);
3802 Kind = tok::unknown;
3807 Kind = tok::unknown;
3811 llvm::UTF32 CodePoint;
3816 const char *UTF8StartPtr = CurPtr;
3817 llvm::ConversionResult Status =
3818 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3819 (
const llvm::UTF8 *)BufferEnd,
3821 llvm::strictConversion);
3822 if (Status == llvm::conversionOK) {
3823 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3824 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3834 return LexUnicode(Result, CodePoint, CurPtr);
3840 Kind = tok::unknown;
3847 Diag(CurPtr, diag::err_invalid_utf8);
3849 BufferPtr = CurPtr+1;
3861 FormTokenWithChars(Result, CurPtr, Kind);
3867 FormTokenWithChars(Result, CurPtr, tok::hash);
3872 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
This is a discriminated union of FileInfo and ExpansionInfo.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
SourceLocation getSpellingLoc() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
void setBegin(SourceLocation b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Defines the SourceManager interface.
LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
void setFlag(TokenFlags Flag)
Set the specified flag.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
Lexer(FileID FID, const llvm::MemoryBuffer *InputBuffer, Preprocessor &PP)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
bool hadModuleLoaderFatalFailure() const
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
Defines the MultipleIncludeOpt interface.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
tok::TokenKind getKind() const
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
SourceLocation getBegin() const
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
void setRawIdentifierData(const char *Ptr)
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
tok::TokenKind getTokenID() const
If this is a source-language token (e.g.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
static SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart, unsigned Character, const SourceManager &SM, const LangOptions &LangOpts)
AdvanceToTokenCharacter - If the current SourceLocation specifies a location at the start of a token...
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
bool isMacroArgExpansion() const
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
const AnnotatedLine * Line
const FunctionProtoType * T
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
bool getCommentRetentionState() const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
const ExpansionInfo & getExpansion() const
bool isRecordingPreamble() const
static CharSourceRange getCharRange(SourceRange R)
SourceManager & getSourceManager() const
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Encodes a location in the source.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
std::pair< SourceLocation, SourceLocation > getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
IdentifierInfo * getIdentifierInfo() const
static Optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
void setIdentifierInfo(IdentifierInfo *II)
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getExpansionLocStart() const
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
tok::PPKeywordKind getPPKeywordID() const
Return the preprocessor keyword ID for this identifier.
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
unsigned getLength() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
SourceLocation getEnd() const
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Defines the clang::TokenKind enum and support functions.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
bool HandleComment(Token &Token, SourceRange Comment)
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Defines the clang::SourceLocation class and associated facilities.
DiagnosticsEngine & getDiagnostics() const
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Not within a conflict marker.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void StringifyImpl(T &Str, char Quote)
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.