21 #include "llvm/ADT/StringSwitch.h" 22 #include "llvm/Support/MemoryBuffer.h" 25 using namespace clang;
39 : Out(Out), Tokens(Tokens), Input(Input), Diags(Diags),
40 InputSourceLoc(InputSourceLoc) {}
56 LLVM_NODISCARD IdInfo lexIdentifier(
const char *First,
const char *
const End);
57 LLVM_NODISCARD
bool isNextIdentifier(StringRef
Id,
const char *&First,
58 const char *
const End);
59 LLVM_NODISCARD
bool minimizeImpl(
const char *First,
const char *
const End);
60 LLVM_NODISCARD
bool lexPPLine(
const char *&First,
const char *
const End);
61 LLVM_NODISCARD
bool lexAt(
const char *&First,
const char *
const End);
62 LLVM_NODISCARD
bool lexModule(
const char *&First,
const char *
const End);
63 LLVM_NODISCARD
bool lexDefine(
const char *&First,
const char *
const End);
64 LLVM_NODISCARD
bool lexPragma(
const char *&First,
const char *
const End);
65 LLVM_NODISCARD
bool lexEndif(
const char *&First,
const char *
const End);
67 const char *&First,
const char *
const End);
69 Tokens.emplace_back(K, Out.size());
73 Out.resize(Tokens.back().Offset);
78 Minimizer &put(
char Byte) {
82 Minimizer &append(StringRef S) {
return append(S.begin(), S.end()); }
83 Minimizer &append(
const char *First,
const char *Last) {
84 Out.append(First, Last);
88 void printToNewline(
const char *&First,
const char *
const End);
89 void printAdjacentModuleNameParts(
const char *&First,
const char *
const End);
90 LLVM_NODISCARD
bool printAtImportBody(
const char *&First,
91 const char *
const End);
92 void printDirectiveBody(
const char *&First,
const char *
const End);
93 void printAdjacentMacroArgs(
const char *&First,
const char *
const End);
94 LLVM_NODISCARD
bool printMacroArgs(
const char *&First,
const char *
const End);
98 bool reportError(
const char *CurPtr,
unsigned Err);
100 StringMap<char> SplitIds;
108 bool Minimizer::reportError(
const char *CurPtr,
unsigned Err) {
111 assert(CurPtr >= Input.data() &&
"invalid buffer ptr");
112 Diags->Report(InputSourceLoc.getLocWithOffset(CurPtr - Input.data()), Err);
122 const char *Current) {
123 assert(First <= Current);
126 if (*Current !=
'"' || First == Current)
137 if (*Current ==
'u' || *Current ==
'U' || *Current ==
'L')
141 if (*Current !=
'8' || First == Current || *Current-- !=
'u')
147 assert(First[0] ==
'"');
148 assert(First[-1] ==
'R');
150 const char *
Last = ++First;
151 while (Last != End && *Last !=
'(')
158 StringRef Terminator(First, Last - First);
162 while (First != End && *First !=
')')
170 while (Last != End &&
size_t(Last - First) < Terminator.size() &&
171 Terminator[Last - First] == *Last)
179 if (
size_t(Last - First) < Terminator.size())
189 static unsigned isEOL(
const char *First,
const char *
const End) {
199 assert(*First ==
'\'' || *First ==
'"' || *First ==
'<');
200 const char Terminator = *First ==
'<' ?
'>' : *First;
201 for (++First; First != End && *First != Terminator; ++First) {
215 const char *FirstAfterBackslashPastSpace = First;
217 if (
unsigned NLSize =
isEOL(FirstAfterBackslashPastSpace, End)) {
220 First = FirstAfterBackslashPastSpace + NLSize - 1;
232 unsigned Len =
isEOL(First, End);
233 assert(Len &&
"expected newline");
239 return *(First - (int)EOLLen - 1) ==
'\\';
247 unsigned Len =
isEOL(First, End);
254 Len =
isEOL(First, End);
257 if (First[-1] !=
'\\')
266 assert(First <= Last);
275 if (Last == LastNonSpace)
278 return LastNonSpace + 1;
282 assert(First[0] ==
'/' && First[1] ==
'/');
288 assert(First[0] ==
'/' && First[1] ==
'*');
289 if (End - First < 4) {
293 for (First += 3; First !=
End; ++First)
294 if (First[-1] ==
'*' && First[0] ==
'/') {
303 const char *
const Cur,
304 const char *
const End) {
305 assert(*Cur ==
'\'' &&
"expected quotation character");
313 char Prev = *(Cur - 1);
314 if (Prev ==
'L' || Prev ==
'U' || Prev ==
'u')
316 if (Prev ==
'8' && (Cur - 1 != Start) && *(Cur - 2) ==
'u')
326 assert(First <= End);
334 const char *Start = First;
347 if (*First !=
'/' || End - First < 2) {
352 if (First[1] ==
'/') {
358 if (First[1] !=
'*') {
377 const char *
const End) {
378 if (llvm::StringSwitch<bool>(Name)
379 .Case(
"warning",
true)
388 void Minimizer::printToNewline(
const char *&
First,
const char *
const End) {
390 const char *
Last = First;
393 if (*Last ==
'"' || *Last ==
'\'' ||
401 if (*Last !=
'/' || End - Last < 2) {
406 if (Last[1] !=
'/' && Last[1] !=
'*') {
415 if (Last[1] ==
'/') {
428 if (Last == End || LastBeforeTrailingSpace == First ||
429 LastBeforeTrailingSpace[-1] !=
'\\') {
430 append(First, LastBeforeTrailingSpace);
440 First, LastBeforeTrailingSpace - 1));
450 assert(First <= End);
466 if (First[1] ==
'/') {
480 void Minimizer::printAdjacentModuleNameParts(
const char *&First,
481 const char *
const End) {
483 const char *
Last = First;
491 bool Minimizer::printAtImportBody(
const char *&First,
const char *
const End) {
504 put(*First++).put(
'\n');
512 printAdjacentModuleNameParts(First, End);
516 void Minimizer::printDirectiveBody(
const char *&First,
const char *
const End) {
518 printToNewline(First, End);
519 while (Out.back() ==
' ')
525 const char *
const End) {
527 const char *
Last = First + 1;
533 LLVM_NODISCARD
static const char *
545 Minimizer::IdInfo Minimizer::lexIdentifier(
const char *First,
546 const char *
const End) {
549 if (LLVM_LIKELY(!Next))
550 return IdInfo{Last, StringRef(First, Last - First)};
556 Id.append(Next, Last);
561 SplitIds.try_emplace(StringRef(Id.begin(), Id.size()), 0).first->first()};
564 void Minimizer::printAdjacentMacroArgs(
const char *&First,
565 const char *
const End) {
567 const char *
Last = First;
570 while (Last != End &&
576 bool Minimizer::printMacroArgs(
const char *&First,
const char *
const End) {
577 assert(*First ==
'(');
593 printAdjacentMacroArgs(First, End);
601 bool Minimizer::isNextIdentifier(StringRef
Id,
const char *&First,
602 const char *
const End) {
607 IdInfo FoundId = lexIdentifier(First, End);
608 First = FoundId.Last;
609 return FoundId.Name ==
Id;
612 bool Minimizer::lexAt(
const char *&First,
const char *
const End) {
614 const char *ImportLoc = First++;
615 if (!isNextIdentifier(
"import", First, End)) {
621 if (printAtImportBody(First, End))
623 ImportLoc, diag::err_dep_source_minimizer_missing_sema_after_at_import);
629 ImportLoc, diag::err_dep_source_minimizer_unexpected_tokens_at_import);
634 bool Minimizer::lexModule(
const char *&First,
const char *
const End) {
635 IdInfo Id = lexIdentifier(First, End);
638 if (Id.Name ==
"export") {
645 Id = lexIdentifier(First, End);
649 if (Id.Name !=
"module" && Id.Name !=
"import") {
676 if (Id.Name ==
"module")
682 printToNewline(First, End);
687 bool Minimizer::lexDefine(
const char *&First,
const char *
const End) {
693 return reportError(First, diag::err_pp_macro_not_identifier);
695 IdInfo Id = lexIdentifier(First, End);
696 const char *
Last = Id.Last;
701 size_t Size = Out.size();
702 if (printMacroArgs(Last, End)) {
706 append(
"(/* invalid */\n");
716 printDirectiveBody(Last, End);
721 bool Minimizer::lexPragma(
const char *&First,
const char *
const End) {
727 IdInfo FoundId = lexIdentifier(First, End);
728 First = FoundId.Last;
729 if (FoundId.Name ==
"once") {
733 append(
"#pragma once\n");
737 if (FoundId.Name !=
"clang") {
743 if (!isNextIdentifier(
"module", First, End)) {
749 if (!isNextIdentifier(
"import", First, End)) {
756 append(
"#pragma clang module import ");
757 printDirectiveBody(First, End);
761 bool Minimizer::lexEndif(
const char *&First,
const char *
const End) {
778 return lexDefault(
pp_endif,
"endif", First, End);
782 const char *&First,
const char *
const End) {
784 put(
'#').append(Directive).put(
' ');
785 printDirectiveBody(First, End);
801 bool Minimizer::lexPPLine(
const char *&First,
const char *
const End) {
802 assert(First != End);
805 assert(First <= End);
811 assert(First <= End);
817 return lexAt(First, End);
819 if (*First ==
'i' || *First ==
'e' || *First ==
'm')
820 return lexModule(First, End);
827 return reportError(First, diag::err_pp_expected_eol);
835 IdInfo Id = lexIdentifier(First, End);
837 auto Kind = llvm::StringSwitch<TokenKind>(Id.Name)
858 return lexEndif(First, End);
861 return lexDefine(First, End);
864 return lexPragma(First, End);
867 return lexDefault(Kind, Id.Name, First, End);
871 if ((End - First) >= 3 && First[0] ==
'\xef' && First[1] ==
'\xbb' &&
876 bool Minimizer::minimizeImpl(
const char *First,
const char *
const End) {
879 if (lexPPLine(First, End))
884 bool Minimizer::minimize() {
885 bool Error = minimizeImpl(Input.begin(), Input.end());
889 if (!Out.empty() && Out.back() !=
'\n')
912 for (
const Token &T : Input) {
917 Offsets.push_back({T.Offset, Directive::If});
924 int PreviousOffset = Offsets.back().Offset;
925 Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
926 Offsets.push_back({T.Offset, Directive::Else});
933 int PreviousOffset = Offsets.back().Offset;
934 Range.push_back({PreviousOffset, T.Offset - PreviousOffset});
936 Directive::DirectiveKind Kind = Offsets.pop_back_val().Kind;
937 if (Kind == Directive::If)
939 }
while (!Offsets.empty());
955 return Minimizer(Output, Tokens, Input, Diags, InputSourceLoc).minimize();
bool minimizeSourceToDependencyDirectives(llvm::StringRef Input, llvm::SmallVectorImpl< char > &Output, llvm::SmallVectorImpl< minimize_source_to_dependency_directives::Token > &Tokens, DiagnosticsEngine *Diags=nullptr, SourceLocation InputSourceLoc=SourceLocation())
Minimize the input down to the preprocessor directives that might have an effect on the dependencies ...
static bool isQuoteCppDigitSeparator(const char *const Start, const char *const Cur, const char *const End)
Specialize PointerLikeTypeTraits to allow LazyGenerationalUpdatePtr to be placed into a PointerUnion...
LLVM_READONLY bool isHorizontalWhitespace(unsigned char c)
Returns true if this character is horizontal ASCII whitespace: ' ', '\t', '\f', '\v'.
Represents a simplified token that's lexed as part of the source minimization.
LLVM_READONLY bool isWhitespace(unsigned char c)
Return true if this character is horizontal or vertical ASCII whitespace: ' ', '\t', '\f', '\v', '\n', '\r'.
static void skipUTF8ByteOrderMark(const char *&First, const char *const End)
static void skipRawString(const char *&First, const char *const End)
Concrete class used by the front-end to report problems and issues.
static const char * findFirstTrailingSpace(const char *First, const char *Last)
Defines the Diagnostic-related interfaces.
static unsigned isEOL(const char *First, const char *const End)
static unsigned skipNewline(const char *&First, const char *End)
static const char * findLastNonSpace(const char *First, const char *Last)
static LLVM_NODISCARD const char * getIdentifierContinuation(const char *First, const char *const End)
static LLVM_NODISCARD bool isRawStringLiteral(const char *First, const char *Current)
LLVM_READONLY bool isIdentifierHead(unsigned char c, bool AllowDollar=false)
Returns true if this is a valid first character of a C identifier, which is [a-zA-Z_].
TokenKind
Represents the kind of preprocessor directive or a module declaration that is tracked by the source m...
static bool isStartOfRelevantLine(char First)
static void skipWhitespace(const char *&First, const char *const End)
static void skipLineComment(const char *&First, const char *const End)
static bool wasLineContinuation(const char *First, unsigned EOLLen)
Encodes a location in the source.
static void skipBlockComment(const char *&First, const char *const End)
static void skipDirective(StringRef Name, const char *&First, const char *const End)
Dataflow Directional Tag Classes.
LLVM_READONLY bool isVerticalWhitespace(unsigned char c)
Returns true if this character is vertical ASCII whitespace: '\n', '\r'.
LLVM_READONLY bool isIdentifierBody(unsigned char c, bool AllowDollar=false)
Returns true if this is a body character of a C identifier, which is [a-zA-Z0-9_].
static LLVM_NODISCARD const char * lexRawIdentifier(const char *First, const char *const End)
static void skipOverSpaces(const char *&First, const char *const End)
static void skipToNewlineRaw(const char *&First, const char *const End)
static void skipLine(const char *&First, const char *const End)
This is the interface for minimizing header and source files to the minimum necessary preprocessor di...
Directive - Abstract class representing a parsed verify directive.
static void skipString(const char *&First, const char *const End)
LLVM_READONLY bool isPreprocessingNumberBody(unsigned char c)
Return true if this is the body character of a C preprocessing number, which is [a-zA-Z0-9_.
bool computeSkippedRanges(ArrayRef< Token > Input, llvm::SmallVectorImpl< SkippedRange > &Range)
Computes the potential source ranges that can be skipped by the preprocessor when skipping a directiv...