30 #include "llvm/ADT/None.h" 31 #include "llvm/ADT/Optional.h" 32 #include "llvm/ADT/StringExtras.h" 33 #include "llvm/ADT/StringSwitch.h" 34 #include "llvm/ADT/StringRef.h" 35 #include "llvm/Support/Compiler.h" 36 #include "llvm/Support/ConvertUTF.h" 37 #include "llvm/Support/MathExtras.h" 38 #include "llvm/Support/MemoryBuffer.h" 39 #include "llvm/Support/NativeFormatting.h" 40 #include "llvm/Support/UnicodeCharRanges.h" 50 using namespace clang;
61 return II->getObjCKeywordID() == objcKey;
68 return tok::objc_not_keyword;
77 void Lexer::anchor() {}
79 void Lexer::InitLexer(
const char *BufStart,
const char *BufPtr,
81 BufferStart = BufStart;
85 assert(BufEnd[0] == 0 &&
86 "We assume that the input buffer has a null character at the end" 87 " to simplify lexing!");
92 if (BufferStart == BufferPtr) {
94 StringRef Buf(BufferStart, BufferEnd - BufferStart);
95 size_t BOMLength = llvm::StringSwitch<size_t>(Buf)
96 .StartsWith(
"\xEF\xBB\xBF", 3)
100 BufferPtr += BOMLength;
103 Is_PragmaLexer =
false;
104 CurrentConflictMarkerState =
CMK_None;
107 IsAtStartOfLine =
true;
108 IsAtPhysicalStartOfLine =
true;
110 HasLeadingSpace =
false;
111 HasLeadingEmptyMacro =
false;
114 ParsingPreprocessorDirective =
false;
117 ParsingFilename =
false;
123 LexingRawMode =
false;
126 ExtendedTokenMode = 0;
135 FileLoc(PP.getSourceManager().getLocForStartOfFile(FID)),
136 LangOpts(PP.getLangOpts()) {
137 InitLexer(InputFile->getBufferStart(), InputFile->getBufferStart(),
138 InputFile->getBufferEnd());
147 const char *BufStart,
const char *BufPtr,
const char *BufEnd)
148 : FileLoc(fileloc), LangOpts(langOpts) {
149 InitLexer(BufStart, BufPtr, BufEnd);
160 :
Lexer(SM.getLocForStartOfFile(FID), langOpts, FromFile->getBufferStart(),
161 FromFile->getBufferStart(), FromFile->getBufferEnd()) {}
164 assert(
PP &&
"Cannot reset token mode without a preprocessor");
165 if (LangOpts.TraditionalCPP)
194 const llvm::MemoryBuffer *InputFile = SM.
getBuffer(SpellingFID);
195 Lexer *L =
new Lexer(SpellingFID, InputFile, PP);
202 L->BufferPtr = StrData;
203 L->BufferEnd = StrData+TokLen;
204 assert(L->BufferEnd[0] == 0 &&
"Buffer is not nul terminated!");
210 ExpansionLocEnd, TokLen);
217 L->Is_PragmaLexer =
true;
222 IsAtPhysicalStartOfLine =
true;
223 IsAtStartOfLine =
true;
224 if ((BufferPtr + NumBytes) > BufferEnd)
226 BufferPtr += NumBytes;
231 typename T::size_type i = 0, e = Str.size();
233 if (Str[i] ==
'\\' || Str[i] == Quote) {
234 Str.insert(Str.begin() + i,
'\\');
237 }
else if (Str[i] ==
'\n' || Str[i] ==
'\r') {
239 if ((i < e - 1) && (Str[i + 1] ==
'\n' || Str[i + 1] ==
'\r') &&
240 Str[i] != Str[i + 1]) {
246 Str.insert(Str.begin() + i + 1,
'n');
257 char Quote = Charify ?
'\'' :
'"';
272 assert(Tok.
needsCleaning() &&
"getSpellingSlow called on simple token");
275 const char *BufEnd = BufPtr + Tok.
getLength();
279 while (BufPtr < BufEnd) {
284 if (Spelling[Length - 1] ==
'"')
292 Spelling[Length - 2] ==
'R' && Spelling[Length - 1] ==
'"') {
295 const char *RawEnd = BufEnd;
296 do --RawEnd;
while (*RawEnd !=
'"');
297 size_t RawLength = RawEnd - BufPtr + 1;
300 memcpy(Spelling + Length, BufPtr, RawLength);
308 while (BufPtr < BufEnd) {
315 "NeedsCleaning flag set on token that didn't need cleaning!");
333 bool invalidTemp =
false;
334 StringRef file = SM.
getBufferData(locInfo.first, &invalidTemp);
336 if (invalid) *invalid =
true;
340 const char *tokenBegin = file.data() + locInfo.second;
344 file.begin(), tokenBegin, file.end());
346 lexer.LexFromRawLexer(token);
348 unsigned length = token.getLength();
351 if (!token.needsCleaning())
352 return StringRef(tokenBegin, length);
355 buffer.resize(length);
356 buffer.resize(
getSpellingSlow(token, tokenBegin, options, buffer.data()));
357 return StringRef(buffer.data(), buffer.size());
367 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
369 bool CharDataInvalid =
false;
373 *Invalid = CharDataInvalid;
379 return std::string(TokStart, TokStart + Tok.
getLength());
383 Result.resize(
getSpellingSlow(Tok, TokStart, LangOpts, &*Result.begin()));
400 assert((
int)Tok.
getLength() >= 0 &&
"Token character range is bogus!");
402 const char *TokStart =
nullptr;
404 if (Tok.
is(tok::raw_identifier))
409 Buffer = II->getNameStart();
410 return II->getLength();
420 bool CharDataInvalid =
false;
423 *Invalid = CharDataInvalid;
424 if (CharDataInvalid) {
437 return getSpellingSlow(Tok, TokStart, LangOpts, const_cast<char*>(Buffer));
458 bool IgnoreWhiteSpace) {
469 bool Invalid =
false;
470 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
474 const char *StrData = Buffer.data()+LocInfo.second;
481 Buffer.begin(), StrData, Buffer.end());
483 TheLexer.LexFromRawLexer(Result);
490 const char *BufStart = Buffer.data();
491 if (Offset >= Buffer.size())
494 const char *LexStart = BufStart +
Offset;
495 for (; LexStart != BufStart; --LexStart) {
511 if (LocInfo.first.isInvalid())
514 bool Invalid =
false;
515 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
521 const char *StrData = Buffer.data() + LocInfo.second;
523 if (!LexStart || LexStart == StrData)
528 Lexer TheLexer(LexerStartLoc, LangOpts, Buffer.data(), LexStart,
535 TheLexer.LexFromRawLexer(TheTok);
537 if (TheLexer.getBufferLocation() > StrData) {
541 if (TheLexer.getBufferLocation() - TheTok.
getLength() <= StrData)
566 std::pair<FileID, unsigned> BeginFileLocInfo =
568 assert(FileLocInfo.first == BeginFileLocInfo.first &&
569 FileLocInfo.second >= BeginFileLocInfo.second);
588 const unsigned StartOffset = 1;
590 Lexer TheLexer(FileLoc, LangOpts, Buffer.begin(), Buffer.begin(),
594 bool InPreprocessorDirective =
false;
598 unsigned MaxLineOffset = 0;
600 const char *CurPtr = Buffer.begin();
601 unsigned CurLine = 0;
602 while (CurPtr != Buffer.end()) {
606 if (CurLine == MaxLines)
610 if (CurPtr != Buffer.end())
611 MaxLineOffset = CurPtr - Buffer.begin();
615 TheLexer.LexFromRawLexer(TheTok);
617 if (InPreprocessorDirective) {
630 InPreprocessorDirective =
false;
639 if (MaxLineOffset && TokOffset >= MaxLineOffset)
644 if (TheTok.
getKind() == tok::comment) {
652 Token HashTok = TheTok;
653 InPreprocessorDirective =
true;
659 TheLexer.LexFromRawLexer(TheTok);
663 = llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
664 .Case(
"include", PDK_Skipped)
665 .Case(
"__include_macros", PDK_Skipped)
666 .Case(
"define", PDK_Skipped)
667 .Case(
"undef", PDK_Skipped)
668 .Case(
"line", PDK_Skipped)
669 .Case(
"error", PDK_Skipped)
670 .Case(
"pragma", PDK_Skipped)
671 .Case(
"import", PDK_Skipped)
672 .Case(
"include_next", PDK_Skipped)
673 .Case(
"warning", PDK_Skipped)
674 .Case(
"ident", PDK_Skipped)
675 .Case(
"sccs", PDK_Skipped)
676 .Case(
"assert", PDK_Skipped)
677 .Case(
"unassert", PDK_Skipped)
678 .Case(
"if", PDK_Skipped)
679 .Case(
"ifdef", PDK_Skipped)
680 .Case(
"ifndef", PDK_Skipped)
681 .Case(
"elif", PDK_Skipped)
682 .Case(
"else", PDK_Skipped)
683 .Case(
"endif", PDK_Skipped)
684 .Default(PDK_Unknown);
709 if (ActiveCommentLoc.
isValid())
710 End = ActiveCommentLoc;
724 bool Invalid =
false;
728 if (Invalid || (CharNo == 0 && Lexer::isObviouslySimpleCharacter(*TokPtr)))
731 unsigned PhysOffset = 0;
736 while (Lexer::isObviouslySimpleCharacter(*TokPtr)) {
746 for (; CharNo; --CharNo) {
757 if (!Lexer::isObviouslySimpleCharacter(*TokPtr))
758 PhysOffset += Lexer::SkipEscapedNewLines(TokPtr)-TokPtr;
810 if (expansionLoc.isFileID()) {
813 *MacroBegin = expansionLoc;
841 *MacroEnd = expansionLoc;
915 bool Invalid =
false;
945 if (Invalid) *Invalid =
true;
951 if (beginInfo.first.isInvalid()) {
952 if (Invalid) *Invalid =
true;
958 beginInfo.second > EndOffs) {
959 if (Invalid) *Invalid =
true;
964 bool invalidTemp =
false;
965 StringRef file = SM.
getBufferData(beginInfo.first, &invalidTemp);
967 if (Invalid) *Invalid =
true;
971 if (Invalid) *Invalid =
false;
972 return file.substr(beginInfo.second, EndOffs - beginInfo.second);
978 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1018 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1019 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1024 assert(Loc.
isMacroID() &&
"Only reasonable to call this on macros");
1043 StringRef ExpansionBuffer = SM.
getBufferData(ExpansionInfo.first);
1044 return ExpansionBuffer.substr(ExpansionInfo.second, MacroTokenLength);
1053 if (Str - 1 < BufferStart)
1056 if ((Str[0] ==
'\n' && Str[-1] ==
'\r') ||
1057 (Str[0] ==
'\r' && Str[-1] ==
'\n')) {
1058 if (Str - 2 < BufferStart)
1068 return *Str ==
'\\';
1076 if (LocInfo.first.isInvalid())
1078 bool Invalid =
false;
1079 StringRef Buffer = SM.
getBufferData(LocInfo.first, &Invalid);
1085 StringRef Rest = Buffer.substr(Line - Buffer.data());
1086 size_t NumWhitespaceChars = Rest.find_first_not_of(
" \t");
1087 return NumWhitespaceChars == StringRef::npos
1089 : Rest.take_front(NumWhitespaceChars);
1104 unsigned CharNo,
unsigned TokLen) {
1105 assert(FileLoc.
isMacroID() &&
"Must be a macro expansion");
1121 return SM.createExpansionLoc(SpellingLoc, II.
getBegin(), II.
getEnd(), TokLen);
1127 unsigned TokLen)
const {
1128 assert(Loc >= BufferStart && Loc <= BufferEnd &&
1129 "Location out of range for this buffer!");
1133 unsigned CharNo = Loc-BufferStart;
1139 assert(PP &&
"This doesn't work on raw lexers");
1158 case '=':
return '#';
1159 case ')':
return ']';
1160 case '(':
return '[';
1161 case '!':
return '|';
1162 case '\'':
return '^';
1163 case '>':
return '}';
1164 case '/':
return '\\';
1165 case '<':
return '{';
1166 case '-':
return '~';
1176 if (!Res || !L)
return Res;
1180 L->
Diag(CP-2, diag::trigraph_ignored);
1185 L->
Diag(CP-2, diag::trigraph_converted) << StringRef(&Res, 1);
1192 unsigned Lexer::getEscapedNewLineSize(
const char *Ptr) {
1197 if (Ptr[Size-1] !=
'\n' && Ptr[Size-1] !=
'\r')
1201 if ((Ptr[Size] ==
'\r' || Ptr[Size] ==
'\n') &&
1202 Ptr[Size-1] != Ptr[Size])
1215 const char *Lexer::SkipEscapedNewLines(
const char *
P) {
1217 const char *AfterEscape;
1220 }
else if (*P ==
'?') {
1222 if (P[1] !=
'?' || P[2] !=
'/')
1231 unsigned NewLineSize = Lexer::getEscapedNewLineSize(AfterEscape);
1232 if (NewLineSize == 0)
return P;
1233 P = AfterEscape+NewLineSize;
1250 bool InvalidTemp =
false;
1251 StringRef File = SM.
getBufferData(LocInfo.first, &InvalidTemp);
1255 const char *TokenBegin = File.data() + LocInfo.second;
1259 TokenBegin, File.end());
1262 lexer.LexFromRawLexer(Tok);
1272 const LangOptions &LangOpts,
bool SkipTrailingWhitespaceAndNewLine) {
1274 if (!Tok || Tok->isNot(TKind))
1279 unsigned NumWhitespaceChars = 0;
1280 if (SkipTrailingWhitespaceAndNewLine) {
1282 unsigned char C = *TokenEnd;
1285 NumWhitespaceChars++;
1289 if (C ==
'\n' || C ==
'\r') {
1292 NumWhitespaceChars++;
1293 if ((C ==
'\n' || C ==
'\r') && C != PrevC)
1294 NumWhitespaceChars++;
1316 char Lexer::getCharAndSizeSlow(
const char *Ptr,
unsigned &Size,
1319 if (Ptr[0] ==
'\\') {
1328 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1334 Diag(Ptr, diag::backslash_newline_space);
1337 Size += EscapedNewLineSize;
1338 Ptr += EscapedNewLineSize;
1341 return getCharAndSizeSlow(Ptr, Size, Tok);
1349 if (Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1358 if (
C ==
'\\')
goto Slash;
1374 char Lexer::getCharAndSizeSlowNoWarn(
const char *Ptr,
unsigned &Size,
1377 if (Ptr[0] ==
'\\') {
1385 if (
unsigned EscapedNewLineSize = getEscapedNewLineSize(Ptr)) {
1387 Size += EscapedNewLineSize;
1388 Ptr += EscapedNewLineSize;
1391 return getCharAndSizeSlowNoWarn(Ptr, Size, LangOpts);
1399 if (LangOpts.Trigraphs && Ptr[0] ==
'?' && Ptr[1] ==
'?') {
1405 if (
C ==
'\\')
goto Slash;
1420 void Lexer::SetByteOffset(
unsigned Offset,
bool StartOfLine) {
1421 BufferPtr = BufferStart +
Offset;
1422 if (BufferPtr > BufferEnd)
1423 BufferPtr = BufferEnd;
1427 IsAtStartOfLine = StartOfLine;
1428 IsAtPhysicalStartOfLine = StartOfLine;
1432 if (LangOpts.AsmPreprocessor) {
1434 }
else if (LangOpts.DollarIdents &&
'$' == C) {
1436 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1437 static const llvm::sys::UnicodeCharSet C11AllowedIDChars(
1439 return C11AllowedIDChars.contains(C);
1440 }
else if (LangOpts.CPlusPlus) {
1441 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1443 return CXX03AllowedIDChars.contains(C);
1445 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1447 return C99AllowedIDChars.contains(C);
1453 if (LangOpts.AsmPreprocessor) {
1455 }
else if (LangOpts.CPlusPlus11 || LangOpts.C11) {
1456 static const llvm::sys::UnicodeCharSet C11DisallowedInitialIDChars(
1458 return !C11DisallowedInitialIDChars.contains(C);
1459 }
else if (LangOpts.CPlusPlus) {
1462 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1464 return !C99DisallowedInitialIDChars.contains(C);
1479 CannotAppearInIdentifier = 0,
1480 CannotStartIdentifier
1483 static const llvm::sys::UnicodeCharSet C99AllowedIDChars(
1485 static const llvm::sys::UnicodeCharSet C99DisallowedInitialIDChars(
1487 if (!C99AllowedIDChars.contains(C)) {
1490 << CannotAppearInIdentifier;
1491 }
else if (IsFirst && C99DisallowedInitialIDChars.contains(C)) {
1494 << CannotStartIdentifier;
1500 static const llvm::sys::UnicodeCharSet CXX03AllowedIDChars(
1502 if (!CXX03AllowedIDChars.contains(C)) {
1503 Diags.
Report(Range.
getBegin(), diag::warn_cxx98_compat_unicode_id)
1516 struct HomoglyphPair {
1519 bool operator<(HomoglyphPair R)
const {
return Character < R.Character; }
1521 static constexpr HomoglyphPair SortedHomoglyphs[] = {
1574 std::lower_bound(std::begin(SortedHomoglyphs),
1575 std::end(SortedHomoglyphs) - 1, HomoglyphPair{
C,
'\0'});
1576 if (Homoglyph->Character == C) {
1579 llvm::raw_svector_ostream CharOS(CharBuf);
1580 llvm::write_hex(CharOS, C, llvm::HexPrintStyle::Upper, 4);
1582 if (Homoglyph->LooksLike) {
1583 const char LooksLikeStr[] = {Homoglyph->LooksLike, 0};
1585 << Range << CharBuf << LooksLikeStr;
1587 Diags.
Report(Range.
getBegin(), diag::warn_utf8_symbol_zero_width)
1588 << Range << CharBuf;
1593 bool Lexer::tryConsumeIdentifierUCN(
const char *&CurPtr,
unsigned Size,
1595 const char *UCNPtr = CurPtr + Size;
1596 uint32_t CodePoint = tryReadUCN(UCNPtr, CurPtr,
nullptr);
1606 if ((UCNPtr - CurPtr == 6 && CurPtr[1] ==
'u') ||
1607 (UCNPtr - CurPtr == 10 && CurPtr[1] ==
'U'))
1610 while (CurPtr != UCNPtr)
1611 (void)getAndAdvanceChar(CurPtr, Result);
1615 bool Lexer::tryConsumeIdentifierUTF8Char(
const char *&CurPtr) {
1616 const char *UnicodePtr = CurPtr;
1617 llvm::UTF32 CodePoint;
1618 llvm::ConversionResult Result =
1619 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&UnicodePtr,
1620 (
const llvm::UTF8 *)BufferEnd,
1622 llvm::strictConversion);
1623 if (Result != llvm::conversionOK ||
1635 CurPtr = UnicodePtr;
1639 bool Lexer::LexIdentifier(
Token &Result,
const char *CurPtr) {
1642 unsigned char C = *CurPtr++;
1653 if (
isASCII(C) && C !=
'\\' && C !=
'?' &&
1654 (C !=
'$' || !LangOpts.DollarIdents)) {
1656 const char *IdStart = BufferPtr;
1657 FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
1674 if (isCodeCompletionPoint(CurPtr)) {
1676 Result.
setKind(tok::code_completion);
1682 assert(*CurPtr == 0 &&
"Completion character must be 0");
1687 if (CurPtr < BufferEnd) {
1705 C = getCharAndSize(CurPtr, Size);
1709 if (!LangOpts.DollarIdents)
goto FinishIdentifier;
1713 Diag(CurPtr, diag::ext_dollar_in_identifier);
1714 CurPtr = ConsumeChar(CurPtr, Size, Result);
1715 C = getCharAndSize(CurPtr, Size);
1717 }
else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
1718 C = getCharAndSize(CurPtr, Size);
1720 }
else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
1721 C = getCharAndSize(CurPtr, Size);
1724 goto FinishIdentifier;
1728 CurPtr = ConsumeChar(CurPtr, Size, Result);
1730 C = getCharAndSize(CurPtr, Size);
1732 CurPtr = ConsumeChar(CurPtr, Size, Result);
1733 C = getCharAndSize(CurPtr, Size);
1740 bool Lexer::isHexaLiteral(
const char *Start,
const LangOptions &LangOpts) {
1746 return (C2 ==
'x' || C2 ==
'X');
1752 bool Lexer::LexNumericConstant(
Token &Result,
const char *CurPtr) {
1754 char C = getCharAndSize(CurPtr, Size);
1757 CurPtr = ConsumeChar(CurPtr, Size, Result);
1759 C = getCharAndSize(CurPtr, Size);
1763 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'E' || PrevCh ==
'e')) {
1766 if (!LangOpts.MicrosoftExt || !isHexaLiteral(BufferPtr, LangOpts))
1767 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1771 if ((C ==
'-' || C ==
'+') && (PrevCh ==
'P' || PrevCh ==
'p')) {
1775 bool IsHexFloat =
true;
1776 if (!LangOpts.C99) {
1777 if (!isHexaLiteral(BufferPtr, LangOpts))
1780 std::find(BufferPtr, CurPtr,
'_') != CurPtr)
1784 return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
1793 Diag(CurPtr, diag::warn_cxx11_compat_digit_separator);
1794 CurPtr = ConsumeChar(CurPtr, Size, Result);
1795 CurPtr = ConsumeChar(CurPtr, NextSize, Result);
1796 return LexNumericConstant(Result, CurPtr);
1801 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1802 return LexNumericConstant(Result, CurPtr);
1803 if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1804 return LexNumericConstant(Result, CurPtr);
1807 const char *TokStart = BufferPtr;
1808 FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
1815 const char *Lexer::LexUDSuffix(
Token &Result,
const char *CurPtr,
1816 bool IsStringLiteral) {
1821 char C = getCharAndSize(CurPtr, Size);
1822 bool Consumed =
false;
1825 if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result))
1827 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr))
1836 C ==
'_' ? diag::warn_cxx11_compat_user_defined_literal
1837 : diag::warn_cxx11_compat_reserved_user_defined_literal)
1848 bool IsUDSuffix =
false;
1855 const unsigned MaxStandardSuffixLength = 3;
1856 char Buffer[MaxStandardSuffixLength] = { C };
1857 unsigned Consumed = Size;
1865 const StringRef CompleteSuffix(Buffer, Chars);
1871 if (Chars == MaxStandardSuffixLength)
1875 Buffer[Chars++] = Next;
1876 Consumed += NextSize;
1883 ? diag::ext_ms_reserved_user_defined_literal
1884 : diag::ext_reserved_user_defined_literal)
1889 CurPtr = ConsumeChar(CurPtr, Size, Result);
1894 C = getCharAndSize(CurPtr, Size);
1896 else if (C ==
'\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {}
1897 else if (!
isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {}
1906 bool Lexer::LexStringLiteral(
Token &Result,
const char *CurPtr,
1908 const char *AfterQuote = CurPtr;
1910 const char *NulCharacter =
nullptr;
1913 (Kind == tok::utf8_string_literal ||
1914 Kind == tok::utf16_string_literal ||
1915 Kind == tok::utf32_string_literal))
1917 ? diag::warn_cxx98_compat_unicode_literal
1918 : diag::warn_c99_compat_unicode_literal);
1920 char C = getAndAdvanceChar(CurPtr, Result);
1925 C = getAndAdvanceChar(CurPtr, Result);
1927 if (C ==
'\n' || C ==
'\r' ||
1928 (C == 0 && CurPtr-1 == BufferEnd)) {
1930 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 1;
1931 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
1936 if (isCodeCompletionPoint(CurPtr-1)) {
1938 codeCompleteIncludedFile(AfterQuote, CurPtr - 1,
false);
1941 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
1946 NulCharacter = CurPtr-1;
1948 C = getAndAdvanceChar(CurPtr, Result);
1953 CurPtr = LexUDSuffix(Result, CurPtr,
true);
1957 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
1960 const char *TokStart = BufferPtr;
1961 FormTokenWithChars(Result, CurPtr, Kind);
1968 bool Lexer::LexRawStringLiteral(
Token &Result,
const char *CurPtr,
1976 Diag(BufferPtr, diag::warn_cxx98_compat_raw_string_literal);
1978 unsigned PrefixLen = 0;
1984 if (CurPtr[PrefixLen] !=
'(') {
1986 const char *PrefixEnd = &CurPtr[PrefixLen];
1987 if (PrefixLen == 16) {
1988 Diag(PrefixEnd, diag::err_raw_delim_too_long);
1990 Diag(PrefixEnd, diag::err_invalid_char_raw_delim)
1991 << StringRef(PrefixEnd, 1);
2003 if (C == 0 && CurPtr-1 == BufferEnd) {
2009 FormTokenWithChars(Result, CurPtr, tok::unknown);
2014 const char *Prefix = CurPtr;
2015 CurPtr += PrefixLen + 1;
2022 if (strncmp(CurPtr, Prefix, PrefixLen) == 0 && CurPtr[PrefixLen] ==
'"') {
2023 CurPtr += PrefixLen + 1;
2026 }
else if (C == 0 && CurPtr-1 == BufferEnd) {
2028 Diag(BufferPtr, diag::err_unterminated_raw_string)
2029 << StringRef(Prefix, PrefixLen);
2030 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2037 CurPtr = LexUDSuffix(Result, CurPtr,
true);
2040 const char *TokStart = BufferPtr;
2041 FormTokenWithChars(Result, CurPtr, Kind);
2048 bool Lexer::LexAngledStringLiteral(
Token &Result,
const char *CurPtr) {
2050 const char *NulCharacter =
nullptr;
2051 const char *AfterLessPos = CurPtr;
2052 char C = getAndAdvanceChar(CurPtr, Result);
2057 C = getAndAdvanceChar(CurPtr, Result);
2059 if (C ==
'\n' || C ==
'\r' ||
2060 (C == 0 && (CurPtr - 1 == BufferEnd))) {
2063 FormTokenWithChars(Result, AfterLessPos, tok::less);
2068 if (isCodeCompletionPoint(CurPtr - 1)) {
2069 codeCompleteIncludedFile(AfterLessPos, CurPtr - 1,
true);
2071 FormTokenWithChars(Result, CurPtr - 1, tok::unknown);
2074 NulCharacter = CurPtr-1;
2076 C = getAndAdvanceChar(CurPtr, Result);
2081 Diag(NulCharacter, diag::null_in_char_or_string) << 1;
2084 const char *TokStart = BufferPtr;
2085 FormTokenWithChars(Result, CurPtr, tok::header_name);
2090 void Lexer::codeCompleteIncludedFile(
const char *PathStart,
2091 const char *CompletionPoint,
2094 StringRef PartialPath(PathStart, CompletionPoint - PathStart);
2095 auto Slash = PartialPath.find_last_of(LangOpts.MSVCCompat ?
"/\\" :
"/");
2097 (Slash == StringRef::npos) ?
"" : PartialPath.take_front(Slash);
2098 const char *StartOfFilename =
2099 (Slash == StringRef::npos) ? PathStart : PathStart + Slash + 1;
2102 StringRef(StartOfFilename, CompletionPoint - StartOfFilename)));
2104 while (CompletionPoint < BufferEnd) {
2105 char Next = *(CompletionPoint + 1);
2106 if (Next == 0 || Next ==
'\r' || Next ==
'\n')
2109 if (Next == (IsAngled ?
'>' :
'"'))
2120 bool Lexer::LexCharConstant(
Token &Result,
const char *CurPtr,
2123 const char *NulCharacter =
nullptr;
2126 if (Kind == tok::utf16_char_constant || Kind == tok::utf32_char_constant)
2128 ? diag::warn_cxx98_compat_unicode_literal
2129 : diag::warn_c99_compat_unicode_literal);
2130 else if (Kind == tok::utf8_char_constant)
2131 Diag(BufferPtr, diag::warn_cxx14_compat_u8_character_literal);
2134 char C = getAndAdvanceChar(CurPtr, Result);
2137 Diag(BufferPtr, diag::ext_empty_character);
2138 FormTokenWithChars(Result, CurPtr, tok::unknown);
2145 C = getAndAdvanceChar(CurPtr, Result);
2147 if (C ==
'\n' || C ==
'\r' ||
2148 (C == 0 && CurPtr-1 == BufferEnd)) {
2150 Diag(BufferPtr, diag::ext_unterminated_char_or_string) << 0;
2151 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2156 if (isCodeCompletionPoint(CurPtr-1)) {
2158 FormTokenWithChars(Result, CurPtr-1, tok::unknown);
2163 NulCharacter = CurPtr-1;
2165 C = getAndAdvanceChar(CurPtr, Result);
2170 CurPtr = LexUDSuffix(Result, CurPtr,
false);
2174 Diag(NulCharacter, diag::null_in_char_or_string) << 0;
2177 const char *TokStart = BufferPtr;
2178 FormTokenWithChars(Result, CurPtr, Kind);
2187 bool Lexer::SkipWhitespace(
Token &Result,
const char *CurPtr,
2188 bool &TokAtPhysicalStartOfLine) {
2192 unsigned char Char = *CurPtr;
2217 FormTokenWithChars(Result, CurPtr, tok::unknown);
2219 IsAtStartOfLine =
true;
2220 IsAtPhysicalStartOfLine =
true;
2227 char PrevChar = CurPtr[-1];
2233 TokAtPhysicalStartOfLine =
true;
2246 bool Lexer::SkipLineComment(
Token &Result,
const char *CurPtr,
2247 bool &TokAtPhysicalStartOfLine) {
2251 Diag(BufferPtr, diag::ext_line_comment);
2255 LangOpts.LineComment =
true;
2269 C !=
'\n' && C !=
'\r')
2272 const char *NextLine = CurPtr;
2275 const char *EscapePtr = CurPtr-1;
2276 bool HasSpace =
false;
2282 if (*EscapePtr ==
'\\')
2285 else if (EscapePtr[0] ==
'/' && EscapePtr[-1] ==
'?' &&
2286 EscapePtr[-2] ==
'?' && LangOpts.Trigraphs)
2288 CurPtr = EscapePtr-2;
2294 Diag(EscapePtr, diag::backslash_newline_space);
2301 const char *OldPtr = CurPtr;
2304 C = getAndAdvanceChar(CurPtr, Result);
2309 if (C != 0 && CurPtr == OldPtr+1) {
2317 if (CurPtr != OldPtr + 1 && C !=
'/' &&
2318 (CurPtr == BufferEnd + 1 || CurPtr[0] !=
'/')) {
2319 for (; OldPtr != CurPtr; ++OldPtr)
2320 if (OldPtr[0] ==
'\n' || OldPtr[0] ==
'\r') {
2324 const char *ForwardPtr = CurPtr;
2327 if (ForwardPtr[0] ==
'/' && ForwardPtr[1] ==
'/')
2332 Diag(OldPtr-1, diag::ext_multi_line_line_comment);
2337 if (C ==
'\r' || C ==
'\n' || CurPtr == BufferEnd + 1) {
2342 if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2360 return SaveLineComment(Result, CurPtr);
2378 TokAtPhysicalStartOfLine =
true;
2387 bool Lexer::SaveLineComment(
Token &Result,
const char *CurPtr) {
2390 FormTokenWithChars(Result, CurPtr, tok::comment);
2397 bool Invalid =
false;
2398 std::string Spelling = PP->
getSpelling(Result, &Invalid);
2402 assert(Spelling[0] ==
'/' && Spelling[1] ==
'/' &&
"Not line comment?");
2417 assert(CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r');
2423 if (CurPtr[0] ==
'\n' || CurPtr[0] ==
'\r') {
2425 if (CurPtr[0] == CurPtr[1])
2433 bool HasSpace =
false;
2440 if (*CurPtr ==
'\\') {
2441 if (CurPtr[-1] !=
'*')
return false;
2444 if (CurPtr[0] !=
'/' || CurPtr[-1] !=
'?' || CurPtr[-2] !=
'?' ||
2455 L->
Diag(CurPtr, diag::trigraph_ignored_block_comment);
2459 L->
Diag(CurPtr, diag::trigraph_ends_block_comment);
2464 L->
Diag(CurPtr, diag::escaped_newline_block_comment_end);
2468 L->
Diag(CurPtr, diag::backslash_newline_space);
2474 #include <emmintrin.h> 2489 bool Lexer::SkipBlockComment(
Token &Result,
const char *CurPtr,
2490 bool &TokAtPhysicalStartOfLine) {
2500 unsigned char C = getCharAndSize(CurPtr, CharSize);
2502 if (C == 0 && CurPtr == BufferEnd+1) {
2504 Diag(BufferPtr, diag::err_unterminated_block_comment);
2510 FormTokenWithChars(Result, CurPtr, tok::unknown);
2526 if (CurPtr + 24 < BufferEnd &&
2531 while (C !=
'/' && ((
intptr_t)CurPtr & 0x0F) != 0)
2534 if (C ==
'/')
goto FoundSlash;
2538 while (CurPtr+16 <= BufferEnd) {
2545 CurPtr += llvm::countTrailingZeros<unsigned>(cmp) + 1;
2551 __vector
unsigned char Slashes = {
2552 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/',
2553 '/',
'/',
'/',
'/',
'/',
'/',
'/',
'/' 2555 while (CurPtr + 16 <= BufferEnd &&
2556 !
vec_any_eq(*(
const __vector
unsigned char *)CurPtr, Slashes))
2560 while (CurPtr[0] !=
'/' &&
2564 CurPtr+4 < BufferEnd) {
2574 while (C !=
'/' && C !=
'\0')
2579 if (CurPtr[-2] ==
'*')
2582 if ((CurPtr[-2] ==
'\n' || CurPtr[-2] ==
'\r')) {
2589 if (CurPtr[0] ==
'*' && CurPtr[1] !=
'/') {
2594 Diag(CurPtr-1, diag::warn_nested_block_comment);
2596 }
else if (C == 0 && CurPtr == BufferEnd+1) {
2598 Diag(BufferPtr, diag::err_unterminated_block_comment);
2607 FormTokenWithChars(Result, CurPtr, tok::unknown);
2613 }
else if (C ==
'\0' && isCodeCompletionPoint(CurPtr-1)) {
2632 FormTokenWithChars(Result, CurPtr, tok::comment);
2641 SkipWhitespace(Result, CurPtr+1, TokAtPhysicalStartOfLine);
2659 "Must be in a preprocessing directive!");
2664 const char *CurPtr = BufferPtr;
2666 char Char = getAndAdvanceChar(CurPtr, Tmp);
2670 Result->push_back(Char);
2674 if (CurPtr-1 != BufferEnd) {
2675 if (isCodeCompletionPoint(CurPtr-1)) {
2683 Result->push_back(Char);
2691 assert(CurPtr[-1] == Char &&
"Trigraphs for newline?");
2692 BufferPtr = CurPtr-1;
2696 if (Tmp.
is(tok::code_completion)) {
2701 assert(Tmp.
is(tok::eod) &&
"Unexpected token!");
2713 bool Lexer::LexEndOfFile(
Token &Result,
const char *CurPtr) {
2721 FormTokenWithChars(Result, CurPtr, tok::eod);
2733 BufferPtr = BufferEnd;
2734 FormTokenWithChars(Result, BufferEnd,
tok::eof);
2749 diag::err_pp_unterminated_conditional);
2755 if (CurPtr != BufferStart && (CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')) {
2760 if (LangOpts.CPlusPlus11) {
2764 if (!Diags.
isIgnored(diag::warn_cxx98_compat_no_newline_eof, EndLoc)) {
2765 DiagID = diag::warn_cxx98_compat_no_newline_eof;
2767 DiagID = diag::warn_no_newline_eof;
2770 DiagID = diag::ext_no_newline_eof;
2773 Diag(BufferEnd, DiagID)
2787 unsigned Lexer::isNextPPTokenLParen() {
2788 assert(!
LexingRawMode &&
"How can we expand a macro from a skipping buffer?");
2796 const char *TmpBufferPtr = BufferPtr;
2798 bool atStartOfLine = IsAtStartOfLine;
2799 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
2800 bool leadingSpace = HasLeadingSpace;
2806 BufferPtr = TmpBufferPtr;
2808 HasLeadingSpace = leadingSpace;
2809 IsAtStartOfLine = atStartOfLine;
2810 IsAtPhysicalStartOfLine = atPhysicalStartOfLine;
2817 return Tok.
is(tok::l_paren);
2823 const char *Terminator = CMK ==
CMK_Perforce ?
"<<<<\n" :
">>>>>>>";
2825 auto RestOfBuffer = StringRef(CurPtr, BufferEnd - CurPtr).substr(TermLen);
2826 size_t Pos = RestOfBuffer.find(Terminator);
2827 while (Pos != StringRef::npos) {
2830 (RestOfBuffer[Pos - 1] !=
'\r' && RestOfBuffer[Pos - 1] !=
'\n')) {
2831 RestOfBuffer = RestOfBuffer.substr(Pos+TermLen);
2832 Pos = RestOfBuffer.find(Terminator);
2835 return RestOfBuffer.data()+Pos;
2844 bool Lexer::IsStartOfConflictMarker(
const char *CurPtr) {
2846 if (CurPtr != BufferStart &&
2847 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2851 if (!StringRef(CurPtr, BufferEnd - CurPtr).startswith(
"<<<<<<<") &&
2852 !StringRef(CurPtr, BufferEnd - CurPtr).startswith(
">>>> "))
2867 Diag(CurPtr, diag::err_conflict_marker);
2868 CurrentConflictMarkerState =
Kind;
2872 while (*CurPtr !=
'\r' && *CurPtr !=
'\n') {
2873 assert(CurPtr != BufferEnd &&
"Didn't find end of line");
2888 bool Lexer::HandleEndOfConflictMarker(
const char *CurPtr) {
2890 if (CurPtr != BufferStart &&
2891 CurPtr[-1] !=
'\n' && CurPtr[-1] !=
'\r')
2900 for (
unsigned i = 1; i != 4; ++i)
2901 if (CurPtr[i] != CurPtr[0])
2908 CurrentConflictMarkerState)) {
2912 while (CurPtr != BufferEnd && *CurPtr !=
'\r' && *CurPtr !=
'\n')
2918 CurrentConflictMarkerState =
CMK_None;
2926 const char *BufferEnd) {
2927 if (CurPtr == BufferEnd)
2930 for (; CurPtr != BufferEnd; ++CurPtr) {
2931 if (CurPtr[0] ==
'#' && CurPtr[1] ==
'>')
2937 bool Lexer::lexEditorPlaceholder(
Token &Result,
const char *CurPtr) {
2938 assert(CurPtr[-1] ==
'<' && CurPtr[0] ==
'#' &&
"Not a placeholder!");
2944 const char *Start = CurPtr - 1;
2945 if (!LangOpts.AllowEditorPlaceholders)
2946 Diag(Start, diag::err_placeholder_in_source);
2948 FormTokenWithChars(Result, End, tok::raw_identifier);
2956 bool Lexer::isCodeCompletionPoint(
const char *CurPtr)
const {
2965 uint32_t Lexer::tryReadUCN(
const char *&StartPtr,
const char *SlashLoc,
2968 char Kind = getCharAndSize(StartPtr, CharSize);
2970 unsigned NumHexDigits;
2973 else if (Kind ==
'U')
2978 if (!LangOpts.CPlusPlus && !LangOpts.C99) {
2980 Diag(SlashLoc, diag::warn_ucn_not_valid_in_c89);
2984 const char *CurPtr = StartPtr + CharSize;
2985 const char *KindLoc = &CurPtr[-1];
2987 uint32_t CodePoint = 0;
2988 for (
unsigned i = 0; i < NumHexDigits; ++i) {
2989 char C = getCharAndSize(CurPtr, CharSize);
2991 unsigned Value = llvm::hexDigitValue(C);
2995 Diag(BufferPtr, diag::warn_ucn_escape_no_digits)
2996 << StringRef(KindLoc, 1);
2998 Diag(BufferPtr, diag::warn_ucn_escape_incomplete);
3001 if (i == 4 && NumHexDigits == 8) {
3003 Diag(KindLoc, diag::note_ucn_four_not_eight)
3020 if (CurPtr - StartPtr == (
ptrdiff_t)NumHexDigits + 2)
3023 while (StartPtr != CurPtr)
3024 (void)getAndAdvanceChar(StartPtr, *Result);
3030 if (LangOpts.AsmPreprocessor)
3044 if (CodePoint < 0xA0) {
3045 if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
3051 if (CodePoint < 0x20 || CodePoint >= 0x7F)
3052 Diag(BufferPtr, diag::err_ucn_control_character);
3054 char C =
static_cast<char>(CodePoint);
3055 Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << StringRef(&C, 1);
3060 }
else if (CodePoint >= 0xD800 && CodePoint <= 0xDFFF) {
3065 if (LangOpts.CPlusPlus && !LangOpts.CPlusPlus11)
3066 Diag(BufferPtr, diag::warn_ucn_escape_surrogate);
3068 Diag(BufferPtr, diag::err_ucn_escape_invalid);
3076 bool Lexer::CheckUnicodeWhitespace(
Token &Result, uint32_t
C,
3077 const char *CurPtr) {
3078 static const llvm::sys::UnicodeCharSet UnicodeWhitespaceChars(
3081 UnicodeWhitespaceChars.contains(
C)) {
3082 Diag(BufferPtr, diag::ext_unicode_whitespace)
3091 bool Lexer::LexUnicode(
Token &Result, uint32_t
C,
const char *CurPtr) {
3103 return LexIdentifier(Result, CurPtr);
3118 Diag(BufferPtr, diag::err_non_ascii)
3128 FormTokenWithChars(Result, CurPtr, tok::unknown);
3132 void Lexer::PropagateLineStartLeadingSpaceInfo(
Token &Result) {
3139 bool Lexer::Lex(
Token &Result) {
3144 if (IsAtStartOfLine) {
3146 IsAtStartOfLine =
false;
3149 if (HasLeadingSpace) {
3151 HasLeadingSpace =
false;
3154 if (HasLeadingEmptyMacro) {
3156 HasLeadingEmptyMacro =
false;
3159 bool atPhysicalStartOfLine = IsAtPhysicalStartOfLine;
3160 IsAtPhysicalStartOfLine =
false;
3163 bool returnedToken = LexTokenInternal(Result, atPhysicalStartOfLine);
3165 assert((returnedToken || !isRawLex) &&
"Raw lex must succeed");
3166 return returnedToken;
3174 bool Lexer::LexTokenInternal(
Token &Result,
bool TokAtPhysicalStartOfLine) {
3181 const char *CurPtr = BufferPtr;
3184 if ((*CurPtr ==
' ') || (*CurPtr ==
'\t')) {
3186 while ((*CurPtr ==
' ') || (*CurPtr ==
'\t'))
3193 FormTokenWithChars(Result, CurPtr, tok::unknown);
3202 unsigned SizeTmp, SizeTmp2;
3205 char Char = getAndAdvanceChar(CurPtr, Result);
3211 if (CurPtr-1 == BufferEnd)
3212 return LexEndOfFile(Result, CurPtr-1);
3215 if (isCodeCompletionPoint(CurPtr-1)) {
3218 FormTokenWithChars(Result, CurPtr, tok::code_completion);
3223 Diag(CurPtr-1, diag::null_in_file);
3225 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3234 if (LangOpts.MicrosoftExt) {
3236 Diag(CurPtr-1, diag::ext_ctrl_z_eof_microsoft);
3237 return LexEndOfFile(Result, CurPtr-1);
3241 Kind = tok::unknown;
3245 if (CurPtr[0] ==
'\n')
3246 (void)getAndAdvanceChar(CurPtr, Result);
3260 IsAtStartOfLine =
true;
3261 IsAtPhysicalStartOfLine =
true;
3270 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3280 SkipHorizontalWhitespace:
3282 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3291 LangOpts.LineComment &&
3292 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP)) {
3293 if (SkipLineComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3295 goto SkipIgnoredUnits;
3297 if (SkipBlockComment(Result, CurPtr+2, TokAtPhysicalStartOfLine))
3299 goto SkipIgnoredUnits;
3301 goto SkipHorizontalWhitespace;
3309 case '0':
case '1':
case '2':
case '3':
case '4':
3310 case '5':
case '6':
case '7':
case '8':
case '9':
3313 return LexNumericConstant(Result, CurPtr);
3319 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3320 Char = getCharAndSize(CurPtr, SizeTmp);
3324 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3325 tok::utf16_string_literal);
3329 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3330 tok::utf16_char_constant);
3333 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3334 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3335 return LexRawStringLiteral(Result,
3336 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3338 tok::utf16_string_literal);
3341 char Char2 = getCharAndSize(CurPtr + SizeTmp, SizeTmp2);
3345 return LexStringLiteral(Result,
3346 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3348 tok::utf8_string_literal);
3349 if (Char2 ==
'\'' && LangOpts.CPlusPlus17)
3350 return LexCharConstant(
3351 Result, ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3353 tok::utf8_char_constant);
3355 if (Char2 ==
'R' && LangOpts.CPlusPlus11) {
3357 char Char3 = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3360 return LexRawStringLiteral(Result,
3361 ConsumeChar(ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3364 tok::utf8_string_literal);
3371 return LexIdentifier(Result, CurPtr);
3377 if (LangOpts.CPlusPlus11 || LangOpts.C11) {
3378 Char = getCharAndSize(CurPtr, SizeTmp);
3382 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3383 tok::utf32_string_literal);
3387 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3388 tok::utf32_char_constant);
3391 if (Char ==
'R' && LangOpts.CPlusPlus11 &&
3392 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3393 return LexRawStringLiteral(Result,
3394 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3396 tok::utf32_string_literal);
3400 return LexIdentifier(Result, CurPtr);
3406 if (LangOpts.CPlusPlus11) {
3407 Char = getCharAndSize(CurPtr, SizeTmp);
3410 return LexRawStringLiteral(Result,
3411 ConsumeChar(CurPtr, SizeTmp, Result),
3412 tok::string_literal);
3416 return LexIdentifier(Result, CurPtr);
3421 Char = getCharAndSize(CurPtr, SizeTmp);
3425 return LexStringLiteral(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3426 tok::wide_string_literal);
3429 if (LangOpts.CPlusPlus11 && Char ==
'R' &&
3430 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
'"')
3431 return LexRawStringLiteral(Result,
3432 ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3434 tok::wide_string_literal);
3438 return LexCharConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3439 tok::wide_char_constant);
3444 case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
case 'G':
3445 case 'H':
case 'I':
case 'J':
case 'K':
case 'M':
case 'N':
3446 case 'O':
case 'P':
case 'Q':
case 'S':
case 'T':
3447 case 'V':
case 'W':
case 'X':
case 'Y':
case 'Z':
3448 case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
case 'g':
3449 case 'h':
case 'i':
case 'j':
case 'k':
case 'l':
case 'm':
case 'n':
3450 case 'o':
case 'p':
case 'q':
case 'r':
case 's':
case 't':
3451 case 'v':
case 'w':
case 'x':
case 'y':
case 'z':
3455 return LexIdentifier(Result, CurPtr);
3458 if (LangOpts.DollarIdents) {
3460 Diag(CurPtr-1, diag::ext_dollar_in_identifier);
3463 return LexIdentifier(Result, CurPtr);
3466 Kind = tok::unknown;
3473 return LexCharConstant(Result, CurPtr, tok::char_constant);
3479 return LexStringLiteral(Result, CurPtr,
3481 : tok::string_literal);
3485 Kind = tok::question;
3488 Kind = tok::l_square;
3491 Kind = tok::r_square;
3494 Kind = tok::l_paren;
3497 Kind = tok::r_paren;
3500 Kind = tok::l_brace;
3503 Kind = tok::r_brace;
3506 Char = getCharAndSize(CurPtr, SizeTmp);
3507 if (Char >=
'0' && Char <=
'9') {
3511 return LexNumericConstant(Result, ConsumeChar(CurPtr, SizeTmp, Result));
3512 }
else if (LangOpts.CPlusPlus && Char ==
'*') {
3513 Kind = tok::periodstar;
3515 }
else if (Char ==
'.' &&
3516 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'.') {
3517 Kind = tok::ellipsis;
3518 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3525 Char = getCharAndSize(CurPtr, SizeTmp);
3528 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3529 }
else if (Char ==
'=') {
3530 Kind = tok::ampequal;
3531 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3537 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3538 Kind = tok::starequal;
3539 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3545 Char = getCharAndSize(CurPtr, SizeTmp);
3547 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3548 Kind = tok::plusplus;
3549 }
else if (Char ==
'=') {
3550 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3551 Kind = tok::plusequal;
3557 Char = getCharAndSize(CurPtr, SizeTmp);
3559 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3560 Kind = tok::minusminus;
3561 }
else if (Char ==
'>' && LangOpts.CPlusPlus &&
3562 getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
'*') {
3563 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3565 Kind = tok::arrowstar;
3566 }
else if (Char ==
'>') {
3567 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3569 }
else if (Char ==
'=') {
3570 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3571 Kind = tok::minusequal;
3580 if (getCharAndSize(CurPtr, SizeTmp) ==
'=') {
3581 Kind = tok::exclaimequal;
3582 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3584 Kind = tok::exclaim;
3589 Char = getCharAndSize(CurPtr, SizeTmp);
3599 bool TreatAsComment = LangOpts.LineComment &&
3600 (LangOpts.CPlusPlus || !LangOpts.TraditionalCPP);
3601 if (!TreatAsComment)
3603 TreatAsComment = getCharAndSize(CurPtr+SizeTmp, SizeTmp2) !=
'*';
3605 if (TreatAsComment) {
3606 if (SkipLineComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3607 TokAtPhysicalStartOfLine))
3613 goto SkipIgnoredUnits;
3618 if (SkipBlockComment(Result, ConsumeChar(CurPtr, SizeTmp, Result),
3619 TokAtPhysicalStartOfLine))
3628 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3629 Kind = tok::slashequal;
3635 Char = getCharAndSize(CurPtr, SizeTmp);
3637 Kind = tok::percentequal;
3638 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3639 }
else if (LangOpts.Digraphs && Char ==
'>') {
3640 Kind = tok::r_brace;
3641 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3642 }
else if (LangOpts.Digraphs && Char ==
':') {
3643 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3644 Char = getCharAndSize(CurPtr, SizeTmp);
3645 if (Char ==
'%' && getCharAndSize(CurPtr+SizeTmp, SizeTmp2) ==
':') {
3646 Kind = tok::hashhash;
3647 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3649 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3650 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3652 Diag(BufferPtr, diag::ext_charize_microsoft);
3659 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3660 goto HandleDirective;
3665 Kind = tok::percent;
3669 Char = getCharAndSize(CurPtr, SizeTmp);
3671 return LexAngledStringLiteral(Result, CurPtr);
3672 }
else if (Char ==
'<') {
3673 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3675 Kind = tok::lesslessequal;
3676 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3678 }
else if (After ==
'<' && IsStartOfConflictMarker(CurPtr-1)) {
3682 }
else if (After ==
'<' && HandleEndOfConflictMarker(CurPtr-1)) {
3686 }
else if (LangOpts.CUDA && After ==
'<') {
3687 Kind = tok::lesslessless;
3688 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3691 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3692 Kind = tok::lessless;
3694 }
else if (Char ==
'=') {
3695 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3699 Diag(BufferPtr, diag::warn_cxx17_compat_spaceship);
3700 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3702 Kind = tok::spaceship;
3708 Diag(BufferPtr, diag::warn_cxx2a_compat_spaceship)
3713 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3714 Kind = tok::lessequal;
3715 }
else if (LangOpts.Digraphs && Char ==
':') {
3716 if (LangOpts.CPlusPlus11 &&
3717 getCharAndSize(CurPtr + SizeTmp, SizeTmp2) ==
':') {
3724 char After = getCharAndSize(CurPtr + SizeTmp + SizeTmp2, SizeTmp3);
3725 if (After !=
':' && After !=
'>') {
3728 Diag(BufferPtr, diag::warn_cxx98_compat_less_colon_colon);
3733 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3734 Kind = tok::l_square;
3735 }
else if (LangOpts.Digraphs && Char ==
'%') {
3736 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3737 Kind = tok::l_brace;
3738 }
else if (Char ==
'#' && SizeTmp == 1 &&
3739 lexEditorPlaceholder(Result, CurPtr)) {
3746 Char = getCharAndSize(CurPtr, SizeTmp);
3748 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3749 Kind = tok::greaterequal;
3750 }
else if (Char ==
'>') {
3751 char After = getCharAndSize(CurPtr+SizeTmp, SizeTmp2);
3753 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3755 Kind = tok::greatergreaterequal;
3756 }
else if (After ==
'>' && IsStartOfConflictMarker(CurPtr-1)) {
3760 }
else if (After ==
'>' && HandleEndOfConflictMarker(CurPtr-1)) {
3763 }
else if (LangOpts.CUDA && After ==
'>') {
3764 Kind = tok::greatergreatergreater;
3765 CurPtr = ConsumeChar(ConsumeChar(CurPtr, SizeTmp, Result),
3768 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3769 Kind = tok::greatergreater;
3772 Kind = tok::greater;
3776 Char = getCharAndSize(CurPtr, SizeTmp);
3778 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3779 Kind = tok::caretequal;
3780 }
else if (LangOpts.OpenCL && Char ==
'^') {
3781 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3782 Kind = tok::caretcaret;
3788 Char = getCharAndSize(CurPtr, SizeTmp);
3790 Kind = tok::pipeequal;
3791 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3792 }
else if (Char ==
'|') {
3794 if (CurPtr[1] ==
'|' && HandleEndOfConflictMarker(CurPtr-1))
3796 Kind = tok::pipepipe;
3797 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3803 Char = getCharAndSize(CurPtr, SizeTmp);
3804 if (LangOpts.Digraphs && Char ==
'>') {
3805 Kind = tok::r_square;
3806 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3807 }
else if ((LangOpts.CPlusPlus ||
3808 LangOpts.DoubleSquareBracketAttributes) &&
3810 Kind = tok::coloncolon;
3811 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3820 Char = getCharAndSize(CurPtr, SizeTmp);
3823 if (CurPtr[1] ==
'=' && HandleEndOfConflictMarker(CurPtr-1))
3826 Kind = tok::equalequal;
3827 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3836 Char = getCharAndSize(CurPtr, SizeTmp);
3838 Kind = tok::hashhash;
3839 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3840 }
else if (Char ==
'@' && LangOpts.MicrosoftExt) {
3843 Diag(BufferPtr, diag::ext_charize_microsoft);
3844 CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
3850 if (TokAtPhysicalStartOfLine && !
LexingRawMode && !Is_PragmaLexer)
3851 goto HandleDirective;
3859 if (CurPtr[-1] ==
'@' && LangOpts.ObjC)
3862 Kind = tok::unknown;
3867 if (!LangOpts.AsmPreprocessor) {
3868 if (uint32_t CodePoint = tryReadUCN(CurPtr, BufferPtr, &Result)) {
3869 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3870 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3878 return LexUnicode(Result, CodePoint, CurPtr);
3882 Kind = tok::unknown;
3887 Kind = tok::unknown;
3891 llvm::UTF32 CodePoint;
3896 llvm::ConversionResult Status =
3897 llvm::convertUTF8Sequence((
const llvm::UTF8 **)&CurPtr,
3898 (
const llvm::UTF8 *)BufferEnd,
3900 llvm::strictConversion);
3901 if (Status == llvm::conversionOK) {
3902 if (CheckUnicodeWhitespace(Result, CodePoint, CurPtr)) {
3903 if (SkipWhitespace(Result, CurPtr, TokAtPhysicalStartOfLine))
3910 return LexUnicode(Result, CodePoint, CurPtr);
3916 Kind = tok::unknown;
3923 Diag(CurPtr, diag::err_invalid_utf8);
3925 BufferPtr = CurPtr+1;
3937 FormTokenWithChars(Result, CurPtr, Kind);
3943 FormTokenWithChars(Result, CurPtr, tok::hash);
3948 assert(Result.
is(
tok::eof) &&
"Preprocessor did not set tok:eof");
SourceLocation getLocForStartOfFile(FileID FID) const
Return the source location corresponding to the first byte of the specified file. ...
Describes the bounds (start, size) of the preamble and a flag required by PreprocessorOptions::Precom...
static unsigned getSpelling(const Token &Tok, const char *&Buffer, const SourceManager &SourceMgr, const LangOptions &LangOpts, bool *Invalid=nullptr)
getSpelling - This method is used to get the spelling of a token into a preallocated buffer...
void setCodeCompletionTokenRange(const SourceLocation Start, const SourceLocation End)
Set the code completion token range for detecting replacement range later on.
Lexer - This provides a simple interface that turns a text buffer into a stream of tokens...
SourceLocation getLocWithOffset(int Offset) const
Return a source location with the specified offset from this SourceLocation.
This is a discriminated union of FileInfo and ExpansionInfo.
unsigned getRawEncoding() const
When a SourceLocation itself cannot be used, this returns an (opaque) 32-bit integer encoding for it...
SourceLocation getSpellingLoc() const
void setFlagValue(TokenFlags Flag, bool Val)
Set a flag to either true or false.
static const llvm::sys::UnicodeCharRange C11AllowedIDCharRanges[]
void setBegin(SourceLocation b)
static __inline__ int __ATTRS_o_ai vec_any_eq(vector signed char __a, vector signed char __b)
bool is(tok::TokenKind K) const
is/isNot - Predicates to check if this token is a specific kind, as in "if (Tok.is(tok::l_brace)) {...
Defines the SourceManager interface.
LLVM_READNONE bool isASCII(char c)
Returns true if this is an ASCII character.
static bool isAllowedIDChar(uint32_t C, const LangOptions &LangOpts)
bool isInPrimaryFile() const
Return true if we're in the top-level file, not in a #include.
const char * getCharacterData(SourceLocation SL, bool *Invalid=nullptr) const
Return a pointer to the start of the specified location in the appropriate spelling MemoryBuffer...
Each ExpansionInfo encodes the expansion location - where the token was ultimately expanded...
DiagnosticBuilder Report(SourceLocation Loc, unsigned DiagID)
Issue the message to the client.
void setFlag(TokenFlags Flag)
Set the specified flag.
static char getCharAndSizeNoWarn(const char *Ptr, unsigned &Size, const LangOptions &LangOpts)
getCharAndSizeNoWarn - Like the getCharAndSize method, but does not ever emit a warning.
void setCodeCompletionIdentifierInfo(IdentifierInfo *Filter)
Set the code completion token for filtering purposes.
StringRef getBufferData(FileID FID, bool *Invalid=nullptr) const
Return a StringRef to the source buffer data for the specified FileID.
bool hadModuleLoaderFatalFailure() const
static bool isAtStartOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroBegin=nullptr)
Returns true if the given MacroID location points at the first token of the macro expansion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
bool isStringLiteral(TokenKind K)
Return true if this is a C or C++ string-literal (or C++11 user-defined-string-literal) token...
ConflictMarkerKind
ConflictMarkerKind - Kinds of conflict marker which the lexer might be recovering from...
static LLVM_ATTRIBUTE_NOINLINE SourceLocation GetMappedTokenLoc(Preprocessor &PP, SourceLocation FileLoc, unsigned CharNo, unsigned TokLen)
GetMappedTokenLoc - If lexing out of a 'mapped buffer', where we pretend the lexer buffer was all exp...
Like System, but searched after the system directories.
SourceLocation getCodeCompletionFileLoc() const
Returns the start location of the file of code-completion point.
static Lexer * Create_PragmaLexer(SourceLocation SpellingLoc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLen, Preprocessor &PP)
Create_PragmaLexer: Lexer constructor - Create a new lexer object for _Pragma expansion.
Defines the MultipleIncludeOpt interface.
bool isAnnotation() const
Return true if this is any of tok::annot_* kind tokens.
tok::TokenKind getKind() const
bool isLiteral() const
Return true if this is a "literal", like a numeric constant, string, etc.
Lexer(FileID FID, const llvm::MemoryBuffer *InputFile, Preprocessor &PP)
Lexer constructor - Create a new lexer object for the specified buffer with the specified preprocesso...
One of these records is kept for each identifier that is lexed.
static StringRef getIndentationForLine(SourceLocation Loc, const SourceManager &SM)
Returns the leading whitespace for line that corresponds to the given location Loc.
SourceLocation getBegin() const
bool ParsingPreprocessorDirective
True when parsing #XXX; turns '\n' into a tok::eod token.
void setRawIdentifierData(const char *Ptr)
static SourceLocation getFromRawEncoding(unsigned Encoding)
Turn a raw encoding of a SourceLocation object into a real SourceLocation.
SmallVector< PPConditionalInfo, 4 > ConditionalStack
Information about the set of #if/#ifdef/#ifndef blocks we are currently in.
Token - This structure provides full information about a lexed token.
void setKind(tok::TokenKind K)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
bool isPragmaLexer() const
isPragmaLexer - Returns true if this Lexer is being used to lex a pragma.
void resetExtendedTokenMode()
Sets the extended token mode back to its initial value, according to the language options and preproc...
A Perforce-style conflict marker, initiated by 4 ">"s, separated by 4 "="s, and terminated by 4 "<"s...
bool isObjCAtKeyword(tok::ObjCKeywordKind objcKey) const
Return true if we have an ObjC keyword identifier.
SourceLocation getSourceLocation() override
getSourceLocation - Return a source location for the next character in the current file...
bool isAtEndOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroEnd=nullptr) const
Returns true if the given MacroID location points at the character end of the immediate macro expansi...
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_set1_epi8(char __b)
Initializes all values in a 128-bit vector of [16 x i8] with the specified 8-bit value.
static SourceLocation getBeginningOfFileToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
static bool isNewLineEscaped(const char *BufferStart, const char *Str)
Checks whether new line pointed by Str is preceded by escape sequence.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
StringRef getSpelling(SourceLocation loc, SmallVectorImpl< char > &buffer, bool *invalid=nullptr) const
Return the 'spelling' of the token at the given location; does not go up to the spelling location or ...
bool skipOver(unsigned NumBytes)
Skip over NumBytes bytes.
Forward-declares and imports various common LLVM datatypes that clang wants to use unqualified...
SourceLocation getExpansionLoc(SourceLocation Loc) const
Given a SourceLocation object Loc, return the expansion location referenced by the ID...
static bool getRawToken(SourceLocation Loc, Token &Result, const SourceManager &SM, const LangOptions &LangOpts, bool IgnoreWhiteSpace=false)
Relex the token at the specified location.
void HandleDirective(Token &Result)
Callback invoked when the lexer sees a # token at the start of a line.
Concrete class used by the front-end to report problems and issues.
Defines the Diagnostic-related interfaces.
SourceLocation getSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
const FileID FID
The SourceManager FileID corresponding to the file being lexed.
LLVM_READONLY bool isRawStringDelimBody(unsigned char c)
Return true if this is the body character of a C++ raw string delimiter.
static bool isEndOfBlockCommentWithEscapedNewLine(const char *CurPtr, Lexer *L)
isBlockCommentEndOfEscapedNewLine - Return true if the specified newline character (either \n or \r) ...
bool isMacroArgExpansion() const
bool HandleComment(Token &result, SourceRange Comment)
static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix)
Determine whether a suffix is a valid ud-suffix.
static CharSourceRange makeCharRange(Lexer &L, const char *Begin, const char *End)
A little helper class used to produce diagnostics.
bool ParsingFilename
True after #include; turns <xx> or "xxx" into a tok::header_name token.
const LangOptions & getLangOpts() const
getLangOpts - Return the language features currently enabled.
static const llvm::sys::UnicodeCharRange C11DisallowedInitialIDCharRanges[]
bool isInFileID(SourceLocation Loc, FileID FID, unsigned *RelativeOffset=nullptr) const
Given a specific FileID, returns true if Loc is inside that FileID chunk and sets relative offset (of...
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
static StringRef getSourceText(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts, bool *Invalid=nullptr)
Returns a string for the source that the range encompasses.
IdentifierInfo * LookUpIdentifierInfo(Token &Identifier) const
Given a tok::raw_identifier token, look up the identifier information for the token and install it in...
static bool isAtEndOfMacroExpansion(SourceLocation loc, const SourceManager &SM, const LangOptions &LangOpts, SourceLocation *MacroEnd=nullptr)
Returns true if the given MacroID location points at the last token of the macro expansion.
bool isCodeCompletionEnabled() const
Determine if we are performing code completion.
SourceLocation getImmediateSpellingLoc(SourceLocation Loc) const
Given a SourceLocation object, return the spelling location referenced by the ID. ...
Defines the clang::LangOptions interface.
bool LexingRawMode
True if in raw mode.
static SourceLocation getLocForEndOfToken(SourceLocation Loc, unsigned Offset, const SourceManager &SM, const LangOptions &LangOpts)
Computes the source location just past the end of the token at this source location.
Represents a character-granular source range.
bool isKeepWhitespaceMode() const
isKeepWhitespaceMode - Return true if the lexer should return tokens for every character in the file...
static PreambleBounds ComputePreamble(StringRef Buffer, const LangOptions &LangOpts, unsigned MaxLines=0)
Compute the preamble of the given file.
static unsigned MeasureTokenLength(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
MeasureTokenLength - Relex the token at the specified location and return its length in bytes in the ...
const FileEntry * getFileEntryForID(FileID FID) const
Returns the FileEntry record for the provided FileID.
const AnnotatedLine * Line
static SourceLocation findLocationAfterToken(SourceLocation loc, tok::TokenKind TKind, const SourceManager &SM, const LangOptions &LangOpts, bool SkipTrailingWhitespaceAndNewLine)
Checks that the given token is the first token that occurs after the given location (this excludes co...
bool hasLeadingEmptyMacro() const
Return true if this token has an empty macro before it.
SourceLocation getSourceLocation(const char *Loc, unsigned TokLen=1) const
getSourceLocation - Return a source location identifier for the specified offset in the current file...
SourceLocation getLocation() const
Return a source location identifier for the specified offset in the current file. ...
Defines the clang::Preprocessor interface.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the Objective-C keyword ID for the this identifier.
static unsigned getTokenPrefixLength(SourceLocation TokStart, unsigned CharNo, const SourceManager &SM, const LangOptions &LangOpts)
Get the physical length (including trigraphs and escaped newlines) of the first Characters characters...
MultipleIncludeOpt MIOpt
A state machine that detects the #ifndef-wrapping a file idiom for the multiple-include optimization...
void setEnd(SourceLocation e)
bool getCommentRetentionState() const
Defines the clang::IdentifierInfo, clang::IdentifierTable, and clang::Selector interfaces.
CharSourceRange getImmediateExpansionRange(SourceLocation Loc) const
Return the start/end of the expansion information for an expansion location.
static const char * findBeginningOfLine(StringRef Buffer, unsigned Offset)
Returns the pointer that points to the beginning of line that contains the given offset, or null if the offset if invalid.
bool HandleEndOfFile(Token &Result, bool isEndOfMacro=false)
Callback invoked when the lexer hits the end of the current file.
The result type of a method or function.
float __ovld __cnfn length(float p)
Return the length of vector p, i.e., sqrt(p.x2 + p.y 2 + ...)
ObjCKeywordKind
Provides a namespace for Objective-C keywords which start with an '@'.
const ExpansionInfo & getExpansion() const
bool isRecordingPreamble() const
static CharSourceRange getCharRange(SourceRange R)
SourceManager & getSourceManager() const
bool isAtStartOfImmediateMacroExpansion(SourceLocation Loc, SourceLocation *MacroBegin=nullptr) const
Returns true if the given MacroID location points at the beginning of the immediate macro expansion...
__INTPTR_TYPE__ intptr_t
A signed integer type with the property that any valid pointer to void can be converted to this type...
Encodes a location in the source.
IdentifierInfo & get(StringRef Name)
Return the identifier token info for the specified named identifier.
static void maybeDiagnoseUTF8Homoglyph(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range)
After encountering UTF-8 character C and interpreting it as an identifier character, check whether it's a homoglyph for a common non-identifier source character that is unlikely to be an intentional identifier character and warn if so.
SourceLocation createExpansionLoc(SourceLocation Loc, SourceLocation ExpansionLocStart, SourceLocation ExpansionLocEnd, unsigned TokLength, bool ExpansionIsTokenRange=true, int LoadedID=0, unsigned LoadedOffset=0)
Return a new SourceLocation that encodes the fact that a token from SpellingLoc should actually be re...
IdentifierInfo * getIdentifierInfo() const
IdentifierTable & getIdentifierTable()
static Optional< Token > findNextToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Finds the token that comes right after the given location.
static void maybeDiagnoseIDCharCompat(DiagnosticsEngine &Diags, uint32_t C, CharSourceRange Range, bool IsFirst)
void setIdentifierInfo(IdentifierInfo *II)
static const llvm::sys::UnicodeCharRange C99DisallowedInitialIDCharRanges[]
bool isAtStartOfLine() const
isAtStartOfLine - Return true if this token is at the start of a line.
static SourceLocation GetBeginningOfToken(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Given a location any where in a source buffer, find the location that corresponds to the beginning of...
static CharSourceRange makeRangeFromFileLocs(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
TokenKind
Provides a simple uniform namespace for tokens from all C languages.
bool operator<(DeclarationName LHS, DeclarationName RHS)
Ordering on two declaration names.
tok::ObjCKeywordKind getObjCKeywordID() const
Return the ObjC keyword kind.
SourceLocation getCodeCompletionLoc() const
Returns the location of the code-completion point.
SourceLocation getExpansionLocStart() const
DiagnosticBuilder Diag(const char *Loc, unsigned DiagID) const
Diag - Forwarding function for diagnostics.
__DEVICE__ void * memcpy(void *__a, const void *__b, size_t __c)
static const llvm::sys::UnicodeCharRange C99AllowedIDCharRanges[]
static StringRef getImmediateMacroName(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
static StringRef getImmediateMacroNameForDiagnostics(SourceLocation Loc, const SourceManager &SM, const LangOptions &LangOpts)
Retrieve the name of the immediate macro expansion.
bool inKeepCommentMode() const
inKeepCommentMode - Return true if the lexer should return comments as tokens.
bool isTokenRange() const
Return true if the end of this range specifies the start of the last token.
static const llvm::sys::UnicodeCharRange UnicodeWhitespaceCharRanges[]
An opaque identifier used by SourceManager which refers to a source file (MemoryBuffer) along with it...
const llvm::MemoryBuffer * getBuffer(FileID FID, SourceLocation Loc, bool *Invalid=nullptr) const
Return the buffer for the specified FileID.
Dataflow Directional Tag Classes.
bool isValid() const
Return true if this is a valid SourceLocation object.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
static CharSourceRange makeFileCharRange(CharSourceRange Range, const SourceManager &SM, const LangOptions &LangOpts)
Accepts a range and returns a character range with file locations.
static size_t getSpellingSlow(const Token &Tok, const char *BufPtr, const LangOptions &LangOpts, char *Spelling)
Slow case of getSpelling.
static FixItHint CreateRemoval(CharSourceRange RemoveRange)
Create a code modification hint that removes the given source range.
bool isHandleIdentifierCase() const
Return true if the Preprocessor::HandleIdentifier must be called on a token of this identifier...
bool isLexingRawMode() const
Return true if this lexer is in raw mode or not.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
void CodeCompleteNaturalLanguage()
Hook used by the lexer to invoke the "natural language" code completion point.
unsigned getLength() const
static const char * findPlaceholderEnd(const char *CurPtr, const char *BufferEnd)
void setLiteralData(const char *Ptr)
const char * getLiteralData() const
getLiteralData - For a literal token (numeric constant, string, etc), this returns a pointer to the s...
FileID getFileID(SourceLocation SpellingLoc) const
Return the FileID for a SourceLocation.
static const llvm::sys::UnicodeCharRange CXX03AllowedIDCharRanges[]
bool isMacroArgExpansion(SourceLocation Loc, SourceLocation *StartLoc=nullptr) const
Tests whether the given source location represents a macro argument's expansion into the function-lik...
bool HandleIdentifier(Token &Identifier)
Callback invoked when the lexer reads an identifier and has filled in the tokens IdentifierInfo membe...
void CreateString(StringRef Str, Token &Tok, SourceLocation ExpansionLocStart=SourceLocation(), SourceLocation ExpansionLocEnd=SourceLocation())
Plop the specified string into a scratch buffer and set the specified token's location and length to ...
static bool isAllowedInitiallyIDChar(uint32_t C, const LangOptions &LangOpts)
SourceLocation getEnd() const
static FixItHint CreateInsertion(SourceLocation InsertionLoc, StringRef Code, bool BeforePreviousInsertions=false)
Create a code modification hint that inserts the given code string at a specific location.
PreprocessorOptions & getPreprocessorOpts() const
Retrieve the preprocessor options used to initialize this preprocessor.
Defines the clang::TokenKind enum and support functions.
const SrcMgr::SLocEntry & getSLocEntry(FileID FID, bool *Invalid=nullptr) const
static char GetTrigraphCharForLetter(char Letter)
GetTrigraphCharForLetter - Given a character that occurs after a ?? pair, return the decoded trigraph...
static bool isIdentifierBodyChar(char c, const LangOptions &LangOpts)
Returns true if the given character could appear in an identifier.
void ReadToEndOfLine(SmallVectorImpl< char > *Result=nullptr)
ReadToEndOfLine - Read the rest of the current preprocessor line as an uninterpreted string...
__PTRDIFF_TYPE__ ptrdiff_t
A signed integer type that is the result of subtracting two pointers.
Defines the clang::SourceLocation class and associated facilities.
DiagnosticsEngine & getDiagnostics() const
StringRef getRawIdentifier() const
getRawIdentifier - For a raw identifier token (i.e., an identifier lexed in raw mode), returns a reference to the text substring in the buffer if known.
Not within a conflict marker.
static char DecodeTrigraphChar(const char *CP, Lexer *L)
DecodeTrigraphChar - If the specified character is a legal trigraph when prefixed with ...
static const char * FindConflictEnd(const char *CurPtr, const char *BufferEnd, ConflictMarkerKind CMK)
Find the end of a version control conflict marker.
static void StringifyImpl(T &Str, char Quote)
static FixItHint CreateReplacement(CharSourceRange RemoveRange, StringRef Code)
Create a code modification hint that replaces the given source range with the given code string...
void SetCommentRetentionState(bool Mode)
SetCommentRetentionMode - Change the comment retention mode of the lexer to the specified mode...
bool needsCleaning() const
Return true if this token has trigraphs or escaped newlines in it.
static __inline__ int __DEFAULT_FN_ATTRS _mm_movemask_epi8(__m128i __a)
Copies the values of the most significant bits from each 8-bit element in a 128-bit integer vector of...
bool isIgnored(unsigned DiagID, SourceLocation Loc) const
Determine whether the diagnostic is known to be ignored.
void CodeCompleteIncludedFile(llvm::StringRef Dir, bool IsAngled)
Hook used by the lexer to invoke the "included file" code completion point.
A normal or diff3 conflict marker, initiated by at least 7 "<"s, separated by at least 7 "="s or "|"s...
A trivial tuple used to represent a source range.
void clearFlag(TokenFlags Flag)
Unset the specified flag.
bool hasUCN() const
Returns true if this token contains a universal character name.
bool isPreprocessedOutput() const
Returns true if the preprocessor is responsible for generating output, false if it is producing token...
void SetKeepWhitespaceMode(bool Val)
SetKeepWhitespaceMode - This method lets clients enable or disable whitespace retention mode...
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool hasLeadingSpace() const
Return true if this token has whitespace before it.
DiagnosticBuilder Diag(SourceLocation Loc, unsigned DiagID) const
Forwarding function for diagnostics.
void setRecordedPreambleConditionalStack(ArrayRef< PPConditionalInfo > s)
This class handles loading and caching of source files into memory.
static __inline__ __m128i __DEFAULT_FN_ATTRS _mm_cmpeq_epi8(__m128i __a, __m128i __b)
Compares each of the corresponding 8-bit values of the 128-bit integer vectors for equality...
void startToken()
Reset all flags to cleared.
std::pair< FileID, unsigned > getDecomposedLoc(SourceLocation Loc) const
Decompose the specified location into a raw FileID + Offset pair.
static std::string Stringify(StringRef Str, bool Charify=false)
Stringify - Convert the specified string into a C string by i) escaping '\' and " characters and ii) ...
Engages in a tight little dance with the lexer to efficiently preprocess tokens.
bool LexEditorPlaceholders
When enabled, the preprocessor will construct editor placeholder tokens.