31 #include "llvm/ADT/None.h" 32 #include "llvm/ADT/Optional.h" 33 #include "llvm/ADT/StringExtras.h" 34 #include "llvm/ADT/StringSwitch.h" 35 #include "llvm/ADT/StringRef.h" 36 #include "llvm/Support/Compiler.h" 37 #include "llvm/Support/ConvertUTF.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/MemoryBuffer.h" 40 #include "llvm/Support/NativeFormatting.h" 41 #include "llvm/Support/UnicodeCharRanges.h" 51 using namespace clang;
62 return II->getObjCKeywordID() == objcKey;
69 return tok::objc_not_keyword;
78 void Lexer::anchor() {}
80 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
82 BufferStart = BufStart;
86 assert(BufEnd[0] == 0 &&
87 "We assume that the input buffer has a null character at the end" 88 " to simplify lexing!");
93 if (BufferStart == BufferPtr) {
95 StringRef Buf(BufferStart, BufferEnd - BufferStart);
96 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
97 .StartsWith(
"\xEF\xBB\xBF", 3)
101 BufferPtr += BOMLength;
104 Is_PragmaLexer =
false;
105 CurrentConflictMarkerState =
CMK_None;
108 IsAtStartOfLine =
true;
109 IsAtPhysicalStartOfLine =
true;
111 HasLeadingSpace =
false;
112 HasLeadingEmptyMacro =
false;
115 ParsingPreprocessorDirective =
false;
118 ParsingFilename =
false;
124 LexingRawMode =
false;
127 ExtendedTokenMode = 0;
136 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
137 LangOpts(PP.getLangOpts()) {
138 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
139 InputFile->getBufferEnd());
148 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
149 : FileLoc(fileloc), LangOpts(langOpts) {
150 InitLexer(BufStart, BufPtr, BufEnd);
161 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
162 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
165 assert(
PP &&
"Cannot reset token mode without a preprocessor");
166 if (LangOpts.TraditionalCPP)
195 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
196 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
203 L->BufferPtr = StrData;
204 L->BufferEnd = StrData+TokLen;
205 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
211 ExpansionLocEnd, TokLen);
218 L->Is_PragmaLexer =
true;
223 typename T::size_type i = 0, e = Str.size();
225 if (Str[i] ==
'\\' || Str[i] == Quote) {
226 Str.insert(Str.begin() + i,
'\\');
229 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
231 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
232 Str[i] != Str[i + 1]) {
238 Str.insert(Str.begin() + i + 1,
'n');
249 char Quote = Charify ?
'\'' :
'"';
264 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
267 const char *BufEnd = BufPtr + Tok.
getLength();
271 while (BufPtr < BufEnd) {
276 if (Spelling[Length - 1] ==
'"')
284 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
287 const char *RawEnd = BufEnd;
288 do --RawEnd;
while (*RawEnd !=
'"');
289 size_t RawLength = RawEnd - BufPtr + 1;
292 memcpy(Spelling + Length, BufPtr, RawLength);
300 while (BufPtr < BufEnd) {
307 "NeedsCleaning flag set on token that didn't need cleaning!");
325 bool invalidTemp =
false;
326 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
328 if (invalid) *invalid =
true;
332 const char *tokenBegin = file.data() + locInfo.second;
336 file.begin(), tokenBegin, file.end());
338 lexer.LexFromRawLexer(token);
340 unsigned length = token.getLength();
343 if (!token.needsCleaning())
344 return StringRef(tokenBegin, length);
347 buffer.resize(length);
348 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
349 return StringRef(buffer.data(), buffer.size());
359 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
361 bool CharDataInvalid =
false;
365 *Invalid = CharDataInvalid;
371 return std::string(TokStart, TokStart + Tok.
getLength());
375 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
392 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
394 const char *TokStart =
nullptr;
396 if (Tok.
is(tok::raw_identifier))
401 Buffer = II->getNameStart();
402 return II->getLength();
412 bool CharDataInvalid =
false;
415 *Invalid = CharDataInvalid;
416 if (CharDataInvalid) {
429 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
450 bool IgnoreWhiteSpace) {
461 bool Invalid =
false;
462 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
466 const char *StrData = Buffer.data()+LocInfo.second;
473 Buffer.begin(), StrData, Buffer.end());
475 TheLexer.LexFromRawLexer(Result);
482 const char *BufStart = Buffer.data();
483 if (Offset >= Buffer.size())
486 const char *LexStart = BufStart +
Offset;
487 for (; LexStart != BufStart; --LexStart) {
503 if (LocInfo.first.isInvalid())
506 bool Invalid =
false;
507 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
513 const char *StrData = Buffer.data() + LocInfo.second;
515 if (!LexStart || LexStart == StrData)
520 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
527 TheLexer.LexFromRawLexer(TheTok);
529 if (TheLexer.getBufferLocation() > StrData) {
533 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
558 std::pair<FileID, unsigned> BeginFileLocInfo =
560 assert(FileLocInfo.first == BeginFileLocInfo.first &&
561 FileLocInfo.second >= BeginFileLocInfo.second);
580 const unsigned StartOffset = 1;
582 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
586 bool InPreprocessorDirective =
false;
590 unsigned MaxLineOffset = 0;
592 const char *CurPtr = Buffer.begin();
593 unsigned CurLine = 0;
594 while (CurPtr != Buffer.end()) {
598 if (CurLine == MaxLines)
602 if (CurPtr != Buffer.end())
603 MaxLineOffset = CurPtr - Buffer.begin();
607 TheLexer.LexFromRawLexer(TheTok);
609 if (InPreprocessorDirective) {
622 InPreprocessorDirective =
false;
631 if (MaxLineOffset && TokOffset >= MaxLineOffset)
636 if (TheTok.
getKind() == tok::comment) {
644 Token HashTok = TheTok;
645 InPreprocessorDirective =
true;
651 TheLexer.LexFromRawLexer(TheTok);
655 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
656 .Case(
"include", PDK_Skipped)
657 .Case(
"__include_macros", PDK_Skipped)
658 .Case(
"define", PDK_Skipped)
659 .Case(
"undef", PDK_Skipped)
660 .Case(
"line", PDK_Skipped)
661 .Case(
"error", PDK_Skipped)
662 .Case(
"pragma", PDK_Skipped)
663 .Case(
"import", PDK_Skipped)
664 .Case(
"include_next", PDK_Skipped)
665 .Case(
"warning", PDK_Skipped)
666 .Case(
"ident", PDK_Skipped)
667 .Case(
"sccs", PDK_Skipped)
668 .Case(
"assert", PDK_Skipped)
669 .Case(
"unassert", PDK_Skipped)
670 .Case(
"if", PDK_Skipped)
671 .Case(
"ifdef", PDK_Skipped)
672 .Case(
"ifndef", PDK_Skipped)
673 .Case(
"elif", PDK_Skipped)
674 .Case(
"else", PDK_Skipped)
675 .Case(
"endif", PDK_Skipped)
676 .Default(PDK_Unknown);
691 InPreprocessorDirective =
false;
702 if (ActiveCommentLoc.
isValid())
703 End = ActiveCommentLoc;
717 bool Invalid =
false;
721 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
724 unsigned PhysOffset = 0;
729 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
739 for (; CharNo; --CharNo) {
750 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
751 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
803 if (expansionLoc.isFileID()) {
806 *MacroBegin = expansionLoc;
834 *MacroEnd = expansionLoc;
908 bool Invalid =
false;
938 if (Invalid) *Invalid =
true;
944 if (beginInfo.first.isInvalid()) {
945 if (Invalid) *Invalid =
true;
951 beginInfo.second > EndOffs) {
952 if (Invalid) *Invalid =
true;
957 bool invalidTemp =
false;
958 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
960 if (Invalid) *Invalid =
true;
964 if (Invalid) *Invalid =
false;
965 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
971 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1011 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1012 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1017 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1036 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1037 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1046 if (Str - 1 < BufferStart)
1049 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1050 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1051 if (Str - 2 < BufferStart)
1061 return *Str ==
'\\';
1069 if (LocInfo.first.isInvalid())
1071 bool Invalid =
false;
1072 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
1078 StringRef Rest = Buffer.substr(Line - Buffer.data());
1079 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1080 return NumWhitespaceChars == StringRef::npos
1082 : Rest.take_front(NumWhitespaceChars);
1097 unsigned CharNo,
unsigned TokLen) {
1098 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1114 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1120 unsigned TokLen)
const {
1121 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1122 "Location out of range for this buffer!");
1126 unsigned CharNo = Loc-BufferStart;
1132 assert(PP &&
"This doesn't work on raw lexers");
1151 case '=':
return '#';
1152 case ')':
return ']';
1153 case '(':
return '[';
1154 case '!':
return '|';
1155 case '\'':
return '^';
1156 case '>':
return '}';
1157 case '/':
return '\\';
1158 case '<':
return '{';
1159 case '-':
return '~';
1169 if (!Res || !L)
return Res;
1173 L->
Diag(CP-2, diag::trigraph_ignored);
1178 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1185 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1190 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1194 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1195 Ptr[Size-1] != Ptr[Size])
1208 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1210 const char *AfterEscape;
1213 }
else if (*P ==
'?') {
1215 if (P[1] !=
'?' || P[2] !=
'/')
1224 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1225 if (NewLineSize == 0)
return P;
1226 P = AfterEscape+NewLineSize;
1243 bool InvalidTemp =
false;
1244 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1248 const char *TokenBegin = File.data() + LocInfo.second;
1252 TokenBegin, File.end());
1255 lexer.LexFromRawLexer(Tok);
1265 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1267 if (!Tok || Tok->isNot(TKind))
1272 unsigned NumWhitespaceChars = 0;
1273 if (SkipTrailingWhitespaceAndNewLine) {
1275 unsigned char C = *TokenEnd;
1278 NumWhitespaceChars++;
1282 if (C ==
'\n' || C ==
'\r') {
1285 NumWhitespaceChars++;
1286 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1287 NumWhitespaceChars++;
1309 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1312 if (Ptr[0] ==
'\\') {
1321 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1327 Diag(Ptr, diag::backslash_newline_space);
1330 Size += EscapedNewLineSize;
1331 Ptr += EscapedNewLineSize;
1334 return getCharAndSizeSlow(Ptr, Size, Tok);
1342 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1351 if (
C ==
'\\')
goto Slash;
1367 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1370 if (Ptr[0] ==
'\\') {
1378 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1380 Size += EscapedNewLineSize;
1381 Ptr += EscapedNewLineSize;
1384 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1392 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1398 if (
C ==
'\\')
goto Slash;
1413 void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1414 BufferPtr = BufferStart +
Offset;
1415 if (BufferPtr > BufferEnd)
1416 BufferPtr = BufferEnd;
1420 IsAtStartOfLine = StartOfLine;
1421 IsAtPhysicalStartOfLine = StartOfLine;
1425 if (LangOpts.AsmPreprocessor) {
1427 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1428 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1430 return C11AllowedIDChars.contains(C);
1431 }
else if (LangOpts.CPlusPlus) {
1432 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1434 return CXX03AllowedIDChars.contains(C);
1436 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1438 return C99AllowedIDChars.contains(C);
1444 if (LangOpts.AsmPreprocessor) {
1446 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1447 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1449 return !C11DisallowedInitialIDChars.contains(C);
1450 }
else if (LangOpts.CPlusPlus) {
1453 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1455 return !C99DisallowedInitialIDChars.contains(C);
1470 CannotAppearInIdentifier = 0,
1471 CannotStartIdentifier
1474 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1476 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1478 if (!C99AllowedIDChars.contains(C)) {
1481 << CannotAppearInIdentifier;
1482 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1485 << CannotStartIdentifier;
1491 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1493 if (!CXX03AllowedIDChars.contains(C)) {
1494 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1507 struct HomoglyphPair {
1510 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1512 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1565 std::lower_bound(std::begin(SortedHomoglyphs),
1566 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1567 if (Homoglyph->Character == C) {
1570 llvm::raw_svector_ostream CharOS(CharBuf);
1571 llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
1573 if (Homoglyph->LooksLike) {
1574 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1576 << Range << CharBuf << LooksLikeStr;
1578 Diags.
Report(Range.
getBegin(), diag::warn_utf8_symbol_zero_width)
1579 << Range << CharBuf;
1584 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1586 const char *UCNPtr = CurPtr + Size;
1587 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1597 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1598 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1601 while (CurPtr != UCNPtr)
1602 (void)getAndAdvanceChar(CurPtr, Result);
1606 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1607 const char *UnicodePtr = CurPtr;
1608 llvm::UTF32 CodePoint;
1609 llvm::ConversionResult Result =
1610 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1611 (
const llvm::UTF8 *)BufferEnd,
1613 llvm::strictConversion);
1614 if (Result != llvm::conversionOK ||
1626 CurPtr = UnicodePtr;
1630 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1633 unsigned char C = *CurPtr++;
1644 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1645 (C !=
'$' || !LangOpts.DollarIdents)) {
1647 const char *IdStart = BufferPtr;
1648 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1665 if (isCodeCompletionPoint(CurPtr)) {
1667 Result.
setKind(tok::code_completion);
1673 assert(*CurPtr == 0 &&
"Completion character must be 0");
1678 if (CurPtr < BufferEnd) {
1696 C = getCharAndSize(CurPtr, Size);
1700 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1704 Diag(CurPtr, diag::ext_dollar_in_identifier);
1705 CurPtr = ConsumeChar(CurPtr, Size, Result);
1706 C = getCharAndSize(CurPtr, Size);
1708 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1709 C = getCharAndSize(CurPtr, Size);
1711 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1712 C = getCharAndSize(CurPtr, Size);
1715 goto FinishIdentifier;
1719 CurPtr = ConsumeChar(CurPtr, Size, Result);
1721 C = getCharAndSize(CurPtr, Size);
1723 CurPtr = ConsumeChar(CurPtr, Size, Result);
1724 C = getCharAndSize(CurPtr, Size);
1731 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1737 return (C2 ==
'x' || C2 ==
'X');
1743 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1745 char C = getCharAndSize(CurPtr, Size);
1748 CurPtr = ConsumeChar(CurPtr, Size, Result);
1750 C = getCharAndSize(CurPtr, Size);
1754 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1757 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1758 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1762 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1766 bool IsHexFloat =
true;
1767 if (!LangOpts.C99) {
1768 if (!isHexaLiteral(BufferPtr, LangOpts))
1771 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1775 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1784 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1785 CurPtr = ConsumeChar(CurPtr, Size, Result);
1786 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1787 return LexNumericConstant(Result, CurPtr);
1792 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1793 return LexNumericConstant(Result, CurPtr);
1794 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1795 return LexNumericConstant(Result, CurPtr);
1798 const char *TokStart = BufferPtr;
1799 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1806 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1807 bool IsStringLiteral) {
1812 char C = getCharAndSize(CurPtr, Size);
1813 bool Consumed =
false;
1816 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1818 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1827 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1828 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1839 bool IsUDSuffix =
false;
1846 const unsigned MaxStandardSuffixLength = 3;
1847 char Buffer[MaxStandardSuffixLength] = { C };
1848 unsigned Consumed = Size;
1856 const StringRef CompleteSuffix(Buffer, Chars);
1862 if (Chars == MaxStandardSuffixLength)
1866 Buffer[Chars++] = Next;
1867 Consumed += NextSize;
1874 ? diag::ext_ms_reserved_user_defined_literal
1875 : diag::ext_reserved_user_defined_literal)
1880 CurPtr = ConsumeChar(CurPtr, Size, Result);
1885 C = getCharAndSize(CurPtr, Size);
1887 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1888 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1897 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1899 const char *AfterQuote = CurPtr;
1901 const char *NulCharacter =
nullptr;
1904 (Kind == tok::utf8_string_literal ||
1905 Kind == tok::utf16_string_literal ||
1906 Kind == tok::utf32_string_literal))
1908 ? diag::warn_cxx98_compat_unicode_literal
1909 : diag::warn_c99_compat_unicode_literal);
1911 char C = getAndAdvanceChar(CurPtr, Result);
1916 C = getAndAdvanceChar(CurPtr, Result);
1918 if (C ==
'\n' || C ==
'\r' ||
1919 (C == 0 && CurPtr-1 == BufferEnd)) {
1921 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1922 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1927 if (isCodeCompletionPoint(CurPtr-1)) {
1929 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
1932 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
1937 NulCharacter = CurPtr-1;
1939 C = getAndAdvanceChar(CurPtr, Result);
1944 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1948 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1951 const char *TokStart = BufferPtr;
1952 FormTokenWithChars(Result, CurPtr, Kind);
1959 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1967 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1969 unsigned PrefixLen = 0;
1975 if (CurPtr[PrefixLen] !=
'(') {
1977 const char *PrefixEnd = &CurPtr[PrefixLen];
1978 if (PrefixLen == 16) {
1979 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1981 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1982 << StringRef(PrefixEnd, 1);
1994 if (C == 0 && CurPtr-1 == BufferEnd) {
2000 FormTokenWithChars(Result, CurPtr, tok::unknown);
2005 const char *Prefix = CurPtr;
2006 CurPtr += PrefixLen + 1;
2013 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2014 CurPtr += PrefixLen + 1;
2017 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
2019 Diag(BufferPtr, diag::err_unterminated_raw_string)
2020 << StringRef(Prefix, PrefixLen);
2021 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2028 CurPtr = LexUDSuffix(Result, CurPtr,
true);
2031 const char *TokStart = BufferPtr;
2032 FormTokenWithChars(Result, CurPtr, Kind);
2039 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
2041 const char *NulCharacter =
nullptr;
2042 const char *AfterLessPos = CurPtr;
2043 char C = getAndAdvanceChar(CurPtr, Result);
2048 C = getAndAdvanceChar(CurPtr, Result);
2050 if (C ==
'\n' || C ==
'\r' ||
2051 (C == 0 && (CurPtr - 1 == BufferEnd))) {
2054 FormTokenWithChars(Result, AfterLessPos, tok::less);
2059 if (isCodeCompletionPoint(CurPtr - 1)) {
2060 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2062 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
2065 NulCharacter = CurPtr-1;
2067 C = getAndAdvanceChar(CurPtr, Result);
2072 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2075 const char *TokStart = BufferPtr;
2076 FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
2081 void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2082 const char *CompletionPoint,
2085 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2086 auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ?
"/\\" :
"/");
2088 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2089 const char *StartOfFilename =
2090 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2093 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2095 while (CompletionPoint < BufferEnd) {
2096 char Next = *(CompletionPoint + 1);
2097 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2100 if (Next == (IsAngled ?
'>' :
'"'))
2111 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
2114 const char *NulCharacter =
nullptr;
2117 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2119 ? diag::warn_cxx98_compat_unicode_literal
2120 : diag::warn_c99_compat_unicode_literal);
2121 else if (Kind == tok::utf8_char_constant)
2122 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2125 char C = getAndAdvanceChar(CurPtr, Result);
2128 Diag(BufferPtr, diag::ext_empty_character);
2129 FormTokenWithChars(Result, CurPtr, tok::unknown);
2136 C = getAndAdvanceChar(CurPtr, Result);
2138 if (C ==
'\n' || C ==
'\r' ||
2139 (C == 0 && CurPtr-1 == BufferEnd)) {
2141 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2142 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2147 if (isCodeCompletionPoint(CurPtr-1)) {
2149 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2154 NulCharacter = CurPtr-1;
2156 C = getAndAdvanceChar(CurPtr, Result);
2161 CurPtr = LexUDSuffix(Result, CurPtr,
false);
2165 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2168 const char *TokStart = BufferPtr;
2169 FormTokenWithChars(Result, CurPtr, Kind);
2178 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2179 bool &TokAtPhysicalStartOfLine) {
2183 unsigned char Char = *CurPtr;
2208 FormTokenWithChars(Result, CurPtr, tok::unknown);
2210 IsAtStartOfLine =
true;
2211 IsAtPhysicalStartOfLine =
true;
2218 char PrevChar = CurPtr[-1];
2224 TokAtPhysicalStartOfLine =
true;
2237 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2238 bool &TokAtPhysicalStartOfLine) {
2242 Diag(BufferPtr, diag::ext_line_comment);
2246 LangOpts.LineComment =
true;
2260 C !=
'\n' && C !=
'\r')
2263 const char *NextLine = CurPtr;
2266 const char *EscapePtr = CurPtr-1;
2267 bool HasSpace =
false;
2273 if (*EscapePtr ==
'\\')
2276 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2277 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2279 CurPtr = EscapePtr-2;
2285 Diag(EscapePtr, diag::backslash_newline_space);
2292 const char *OldPtr = CurPtr;
2295 C = getAndAdvanceChar(CurPtr, Result);
2300 if (C != 0 && CurPtr == OldPtr+1) {
2308 if (CurPtr != OldPtr + 1 && C !=
'/' &&
2309 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2310 for (; OldPtr != CurPtr; ++OldPtr)
2311 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2315 const char *ForwardPtr = CurPtr;
2318 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2323 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2328 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2333 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2351 return SaveLineComment(Result, CurPtr);
2369 TokAtPhysicalStartOfLine =
true;
2378 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2381 FormTokenWithChars(Result, CurPtr, tok::comment);
2388 bool Invalid =
false;
2389 std::string Spelling = PP->
getSpelling(Result, &Invalid);
2393 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2408 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2414 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2416 if (CurPtr[0] == CurPtr[1])
2424 bool HasSpace =
false;
2431 if (*CurPtr ==
'\\') {
2432 if (CurPtr[-1] !=
'*')
return false;
2435 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2446 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2450 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2455 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2459 L->
Diag(CurPtr, diag::backslash_newline_space);
2480 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2481 bool &TokAtPhysicalStartOfLine) {
2491 unsigned char C = getCharAndSize(CurPtr, CharSize);
2493 if (C == 0 && CurPtr == BufferEnd+1) {
2495 Diag(BufferPtr, diag::err_unterminated_block_comment);
2501 FormTokenWithChars(Result, CurPtr, tok::unknown);
2517 if (CurPtr + 24 < BufferEnd &&
2522 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2525 if (C ==
'/')
goto FoundSlash;
2529 while (CurPtr+16 <= BufferEnd) {
2536 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2542 __vector
unsigned char Slashes = {
2543 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2544 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/' 2546 while (CurPtr+16 <= BufferEnd &&
2547 !
vec_any_eq(*(
const vector
unsigned char*)CurPtr, Slashes))
2551 while (CurPtr[0] !=
'/' &&
2555 CurPtr+4 < BufferEnd) {
2565 while (C !=
'/' && C !=
'\0')
2570 if (CurPtr[-2] ==
'*')
2573 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2580 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2585 Diag(CurPtr-1, diag::warn_nested_block_comment);
2587 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2589 Diag(BufferPtr, diag::err_unterminated_block_comment);
2598 FormTokenWithChars(Result, CurPtr, tok::unknown);
2604 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2623 FormTokenWithChars(Result, CurPtr, tok::comment);
2632 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2650 "Must be in a preprocessing directive!");
2654 const char *CurPtr = BufferPtr;
2656 char Char = getAndAdvanceChar(CurPtr, Tmp);
2660 Result->push_back(Char);
2664 if (CurPtr-1 != BufferEnd) {
2665 if (isCodeCompletionPoint(CurPtr-1)) {
2673 Result->push_back(Char);
2681 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2682 BufferPtr = CurPtr-1;
2686 if (Tmp.
is(tok::code_completion)) {
2691 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2703 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2711 FormTokenWithChars(Result, CurPtr, tok::eod);
2723 BufferPtr = BufferEnd;
2724 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2739 diag::err_pp_unterminated_conditional);
2745 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2750 if (LangOpts.CPlusPlus11) {
2754 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2755 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2757 DiagID = diag::warn_no_newline_eof;
2760 DiagID = diag::ext_no_newline_eof;
2763 Diag(BufferEnd, DiagID)
2777 unsigned Lexer::isNextPPTokenLParen() {
2778 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2786 const char *TmpBufferPtr = BufferPtr;
2788 bool atStartOfLine = IsAtStartOfLine;
2789 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2790 bool leadingSpace = HasLeadingSpace;
2796 BufferPtr = TmpBufferPtr;
2798 HasLeadingSpace = leadingSpace;
2799 IsAtStartOfLine = atStartOfLine;
2800 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2807 return Tok.
is(tok::l_paren);
2813 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2815 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2816 size_t Pos = RestOfBuffer.find(Terminator);
2817 while (Pos != StringRef::npos) {
2820 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2821 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2822 Pos = RestOfBuffer.find(Terminator);
2825 return RestOfBuffer.data()+Pos;
2834 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2836 if (CurPtr != BufferStart &&
2837 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2841 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2842 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2857 Diag(CurPtr, diag::err_conflict_marker);
2858 CurrentConflictMarkerState =
Kind;
2862 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2863 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2878 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2880 if (CurPtr != BufferStart &&
2881 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2890 for (
unsigned i = 1; i != 4; ++i)
2891 if (CurPtr[i] != CurPtr[0])
2898 CurrentConflictMarkerState)) {
2902 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2908 CurrentConflictMarkerState =
CMK_None;
2916 const char *BufferEnd) {
2917 if (CurPtr == BufferEnd)
2920 for (; CurPtr != BufferEnd; ++CurPtr) {
2921 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2927 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2928 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2934 const char *Start = CurPtr - 1;
2935 if (!LangOpts.AllowEditorPlaceholders)
2936 Diag(Start, diag::err_placeholder_in_source);
2938 FormTokenWithChars(Result, End, tok::raw_identifier);
2946 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2955 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2958 char Kind = getCharAndSize(StartPtr, CharSize);
2960 unsigned NumHexDigits;
2963 else if (Kind ==
'U')
2968 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2970 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2974 const char *CurPtr = StartPtr + CharSize;
2975 const char *KindLoc = &CurPtr[-1];
2977 uint32_t CodePoint = 0;
2978 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2979 char C = getCharAndSize(CurPtr, CharSize);
2981 unsigned Value = llvm::hexDigitValue(C);
2985 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2986 << StringRef(KindLoc, 1);
2988 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
2991 if (i == 4 && NumHexDigits == 8) {
2993 Diag(KindLoc, diag::note_ucn_four_not_eight)
3010 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
3013 while (StartPtr != CurPtr)
3014 (void)getAndAdvanceChar(StartPtr, *Result);
3020 if (LangOpts.AsmPreprocessor)
3034 if (CodePoint < 0xA0) {
3035 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
3041 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3042 Diag(BufferPtr, diag::err_ucn_control_character);
3044 char C =
static_cast<char>(CodePoint);
3045 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
3050 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3055 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3056 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3058 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3066 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
3067 const char *CurPtr) {
3068 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
3071 UnicodeWhitespaceChars.contains(
C)) {
3072 Diag(BufferPtr, diag::ext_unicode_whitespace)
3081 bool Lexer::LexUnicode(
Token &Result, uint32_t
C,
const char *CurPtr) {
3093 return LexIdentifier(Result, CurPtr);
3108 Diag(BufferPtr, diag::err_non_ascii)
3118 FormTokenWithChars(Result, CurPtr, tok::unknown);
3122 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
3129 bool Lexer::Lex(
Token &Result) {
3134 if (IsAtStartOfLine) {
3136 IsAtStartOfLine =
false;
3139 if (HasLeadingSpace) {
3141 HasLeadingSpace =
false;
3144 if (HasLeadingEmptyMacro) {
3146 HasLeadingEmptyMacro =
false;
3149 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3150 IsAtPhysicalStartOfLine =
false;
3153 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3155 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3156 return returnedToken;
3164 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
3171 const char *CurPtr = BufferPtr;
3174 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3176 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3183 FormTokenWithChars(Result, CurPtr, tok::unknown);
3192 unsigned SizeTmp, SizeTmp2;
3195 char Char = getAndAdvanceChar(CurPtr, Result);
3201 if (CurPtr-1 == BufferEnd)
3202 return LexEndOfFile(Result, CurPtr-1);
3205 if (isCodeCompletionPoint(CurPtr-1)) {
3208 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3213 Diag(CurPtr-1, diag::null_in_file);
3215 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3224 if (LangOpts.MicrosoftExt) {
3226 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3227 return LexEndOfFile(Result, CurPtr-1);
3231 Kind = tok::unknown;
3235 if (CurPtr[0] ==
'\n')
3236 Char = getAndAdvanceChar(CurPtr, Result);
3250 IsAtStartOfLine =
true;
3251 IsAtPhysicalStartOfLine =
true;
3260 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3270 SkipHorizontalWhitespace:
3272 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3281 LangOpts.LineComment &&
3282 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3283 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3285 goto SkipIgnoredUnits;
3287 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3289 goto SkipIgnoredUnits;
3291 goto SkipHorizontalWhitespace;
3299 case '0':
case '1':
case '2':
case '3':
case '4':
3300 case '5':
case '6':
case '7':
case '8':
case '9':
3303 return LexNumericConstant(Result, CurPtr);
3309 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3310 Char = getCharAndSize(CurPtr, SizeTmp);
3314 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3315 tok::utf16_string_literal);
3319 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3320 tok::utf16_char_constant);
3323 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3324 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3325 return LexRawStringLiteral(Result,
3326 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3328 tok::utf16_string_literal);
3331 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3335 return LexStringLiteral(Result,
3336 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3338 tok::utf8_string_literal);
3339 if (Char2 ==
'\'' && LangOpts.CPlusPlus17)
3340 return LexCharConstant(
3341 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3343 tok::utf8_char_constant);
3345 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3347 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3350 return LexRawStringLiteral(Result,
3351 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3354 tok::utf8_string_literal);
3361 return LexIdentifier(Result, CurPtr);
3367 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3368 Char = getCharAndSize(CurPtr, SizeTmp);
3372 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3373 tok::utf32_string_literal);
3377 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3378 tok::utf32_char_constant);
3381 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3382 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3383 return LexRawStringLiteral(Result,
3384 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3386 tok::utf32_string_literal);
3390 return LexIdentifier(Result, CurPtr);
3396 if (LangOpts.CPlusPlus11) {
3397 Char = getCharAndSize(CurPtr, SizeTmp);
3400 return LexRawStringLiteral(Result,
3401 ConsumeChar(CurPtr, SizeTmp, Result),
3402 tok::string_literal);
3406 return LexIdentifier(Result, CurPtr);
3411 Char = getCharAndSize(CurPtr, SizeTmp);
3415 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3416 tok::wide_string_literal);
3419 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3420 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3421 return LexRawStringLiteral(Result,
3422 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3424 tok::wide_string_literal);
3428 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3429 tok::wide_char_constant);
3434 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3435 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3436 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3437 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3438 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3439 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3440 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3441 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3445 return LexIdentifier(Result, CurPtr);
3448 if (LangOpts.DollarIdents) {
3450 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3453 return LexIdentifier(Result, CurPtr);
3456 Kind = tok::unknown;
3463 return LexCharConstant(Result, CurPtr, tok::char_constant);
3469 return LexStringLiteral(Result, CurPtr, tok::string_literal);
3473 Kind = tok::question;
3476 Kind = tok::l_square;
3479 Kind = tok::r_square;
3482 Kind = tok::l_paren;
3485 Kind = tok::r_paren;
3488 Kind = tok::l_brace;
3491 Kind = tok::r_brace;
3494 Char = getCharAndSize(CurPtr, SizeTmp);
3495 if (Char >=
'0' && Char <=
'9') {
3499 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3500 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3501 Kind = tok::periodstar;
3503 }
else if (Char ==
'.' &&
3504 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3505 Kind = tok::ellipsis;
3506 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3513 Char = getCharAndSize(CurPtr, SizeTmp);
3516 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3517 }
else if (Char ==
'=') {
3518 Kind = tok::ampequal;
3519 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3525 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3526 Kind = tok::starequal;
3527 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3533 Char = getCharAndSize(CurPtr, SizeTmp);
3535 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3536 Kind = tok::plusplus;
3537 }
else if (Char ==
'=') {
3538 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3539 Kind = tok::plusequal;
3545 Char = getCharAndSize(CurPtr, SizeTmp);
3547 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3548 Kind = tok::minusminus;
3549 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3550 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3551 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3553 Kind = tok::arrowstar;
3554 }
else if (Char ==
'>') {
3555 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3557 }
else if (Char ==
'=') {
3558 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3559 Kind = tok::minusequal;
3568 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3569 Kind = tok::exclaimequal;
3570 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3572 Kind = tok::exclaim;
3577 Char = getCharAndSize(CurPtr, SizeTmp);
3587 bool TreatAsComment = LangOpts.LineComment &&
3588 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3589 if (!TreatAsComment)
3591 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3593 if (TreatAsComment) {
3594 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3595 TokAtPhysicalStartOfLine))
3601 goto SkipIgnoredUnits;
3606 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3607 TokAtPhysicalStartOfLine))
3616 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3617 Kind = tok::slashequal;
3623 Char = getCharAndSize(CurPtr, SizeTmp);
3625 Kind = tok::percentequal;
3626 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3627 }
else if (LangOpts.Digraphs && Char ==
'>') {
3628 Kind = tok::r_brace;
3629 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3630 }
else if (LangOpts.Digraphs && Char ==
':') {
3631 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3632 Char = getCharAndSize(CurPtr, SizeTmp);
3633 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3634 Kind = tok::hashhash;
3635 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3637 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3638 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3640 Diag(BufferPtr, diag::ext_charize_microsoft);
3647 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3648 goto HandleDirective;
3653 Kind = tok::percent;
3657 Char = getCharAndSize(CurPtr, SizeTmp);
3659 return LexAngledStringLiteral(Result, CurPtr);
3660 }
else if (Char ==
'<') {
3661 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3663 Kind = tok::lesslessequal;
3664 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3666 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3670 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3674 }
else if (LangOpts.CUDA && After ==
'<') {
3675 Kind = tok::lesslessless;
3676 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3679 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3680 Kind = tok::lessless;
3682 }
else if (Char ==
'=') {
3683 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3687 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
3688 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3690 Kind = tok::spaceship;
3696 Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
3701 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3702 Kind = tok::lessequal;
3703 }
else if (LangOpts.Digraphs && Char ==
':') {
3704 if (LangOpts.CPlusPlus11 &&
3705 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3712 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3713 if (After !=
':' && After !=
'>') {
3716 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3721 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3722 Kind = tok::l_square;
3723 }
else if (LangOpts.Digraphs && Char ==
'%') {
3724 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3725 Kind = tok::l_brace;
3726 }
else if (Char ==
'#' && SizeTmp == 1 &&
3727 lexEditorPlaceholder(Result, CurPtr)) {
3734 Char = getCharAndSize(CurPtr, SizeTmp);
3736 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3737 Kind = tok::greaterequal;
3738 }
else if (Char ==
'>') {
3739 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3741 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3743 Kind = tok::greatergreaterequal;
3744 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3748 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3751 }
else if (LangOpts.CUDA && After ==
'>') {
3752 Kind = tok::greatergreatergreater;
3753 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3756 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3757 Kind = tok::greatergreater;
3760 Kind = tok::greater;
3764 Char = getCharAndSize(CurPtr, SizeTmp);
3766 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3767 Kind = tok::caretequal;
3768 }
else if (LangOpts.OpenCL && Char ==
'^') {
3769 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3770 Kind = tok::caretcaret;
3776 Char = getCharAndSize(CurPtr, SizeTmp);
3778 Kind = tok::pipeequal;
3779 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3780 }
else if (Char ==
'|') {
3782 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3784 Kind = tok::pipepipe;
3785 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3791 Char = getCharAndSize(CurPtr, SizeTmp);
3792 if (LangOpts.Digraphs && Char ==
'>') {
3793 Kind = tok::r_square;
3794 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3795 }
else if ((LangOpts.CPlusPlus ||
3796 LangOpts.DoubleSquareBracketAttributes) &&
3798 Kind = tok::coloncolon;
3799 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3808 Char = getCharAndSize(CurPtr, SizeTmp);
3811 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3814 Kind = tok::equalequal;
3815 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3824 Char = getCharAndSize(CurPtr, SizeTmp);
3826 Kind = tok::hashhash;
3827 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3828 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3831 Diag(BufferPtr, diag::ext_charize_microsoft);
3832 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3838 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3839 goto HandleDirective;
3847 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
3850 Kind = tok::unknown;
3855 if (!LangOpts.AsmPreprocessor) {
3856 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3857 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3858 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3866 return LexUnicode(Result, CodePoint, CurPtr);
3870 Kind = tok::unknown;
3875 Kind = tok::unknown;
3879 llvm::UTF32 CodePoint;
3884 llvm::ConversionResult Status =
3885 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3886 (
const llvm::UTF8 *)BufferEnd,
3888 llvm::strictConversion);
3889 if (Status == llvm::conversionOK) {
3890 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3891 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3898 return LexUnicode(Result, CodePoint, CurPtr);
3904 Kind = tok::unknown;
3911 Diag(CurPtr, diag::err_invalid_utf8);
3913 BufferPtr = CurPtr+1;
3925 FormTokenWithChars(Result, CurPtr, Kind);
3931 FormTokenWithChars(Result, CurPtr, tok::hash);
3936 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
This is a discriminated union of FileInfo and ExpansionInfo.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
SourceLocation getSpellingLoc() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
void setBegin(SourceLocation b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Defines the SourceManager interface.
LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
void setFlag(TokenFlags Flag)
Set the specified flag.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool hadModuleLoaderFatalFailure() const
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
Defines the MultipleIncludeOpt interface.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
tok::TokenKind getKind() const
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
SourceLocation getBegin() const
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
void setRawIdentifierData(const char *Ptr)
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
bool isMacroArgExpansion() const
bool HandleComment(Token &result, SourceRange Comment)
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> into a tok::angle_string_literal token.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
const AnnotatedLine * Line
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
bool getCommentRetentionState() const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
CharSourceRange getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
const ExpansionInfo & getExpansion() const
bool isRecordingPreamble() const
static CharSourceRange getCharRange(SourceRange R)
SourceManager & getSourceManager() const
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, bool ExpansionIsTokenRange=true, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
IdentifierInfo * getIdentifierInfo() const
IdentifierTable & getIdentifierTable()
static Optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
void setIdentifierInfo(IdentifierInfo *II)
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getExpansionLocStart() const
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
unsigned getLength() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
SourceLocation getEnd() const
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Defines the clang::TokenKind enum and support functions.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
Defines the clang::SourceLocation class and associated facilities.
DiagnosticsEngine & getDiagnostics() const
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Not within a conflict marker.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void StringifyImpl(T &Str, char Quote)
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.