clang  8.0.0
ScanfFormatString.cpp
Go to the documentation of this file.
1 //= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Handling of format string in scanf and friends. The structure of format
11 // strings for fscanf() are described in C99 7.19.6.2.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "clang/AST/FormatString.h"
16 #include "FormatStringParsing.h"
17 #include "clang/Basic/TargetInfo.h"
18 
27 using namespace clang;
28 
31 
34  const char *&Beg, const char *E) {
35  const char *I = Beg;
36  const char *start = I - 1;
37  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
38 
39  // No more characters?
40  if (I == E) {
41  H.HandleIncompleteScanList(start, I);
42  return true;
43  }
44 
45  // Special case: ']' is the first character.
46  if (*I == ']') {
47  if (++I == E) {
48  H.HandleIncompleteScanList(start, I - 1);
49  return true;
50  }
51  }
52 
53  // Special case: "^]" are the first characters.
54  if (I + 1 != E && I[0] == '^' && I[1] == ']') {
55  I += 2;
56  if (I == E) {
57  H.HandleIncompleteScanList(start, I - 1);
58  return true;
59  }
60  }
61 
62  // Look for a ']' character which denotes the end of the scan list.
63  while (*I != ']') {
64  if (++I == E) {
65  H.HandleIncompleteScanList(start, I - 1);
66  return true;
67  }
68  }
69 
70  CS.setEndScanList(I);
71  return false;
72 }
73 
74 // FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
75 // We can possibly refactor.
77  const char *&Beg,
78  const char *E,
79  unsigned &argIndex,
80  const LangOptions &LO,
81  const TargetInfo &Target) {
82  using namespace clang::analyze_format_string;
83  using namespace clang::analyze_scanf;
84  const char *I = Beg;
85  const char *Start = nullptr;
86  UpdateOnReturn <const char*> UpdateBeg(Beg, I);
87 
88  // Look for a '%' character that indicates the start of a format specifier.
89  for ( ; I != E ; ++I) {
90  char c = *I;
91  if (c == '\0') {
92  // Detect spurious null characters, which are likely errors.
93  H.HandleNullChar(I);
94  return true;
95  }
96  if (c == '%') {
97  Start = I++; // Record the start of the format specifier.
98  break;
99  }
100  }
101 
102  // No format specifier found?
103  if (!Start)
104  return false;
105 
106  if (I == E) {
107  // No more characters left?
108  H.HandleIncompleteSpecifier(Start, E - Start);
109  return true;
110  }
111 
112  ScanfSpecifier FS;
113  if (ParseArgPosition(H, FS, Start, I, E))
114  return true;
115 
116  if (I == E) {
117  // No more characters left?
118  H.HandleIncompleteSpecifier(Start, E - Start);
119  return true;
120  }
121 
122  // Look for '*' flag if it is present.
123  if (*I == '*') {
124  FS.setSuppressAssignment(I);
125  if (++I == E) {
126  H.HandleIncompleteSpecifier(Start, E - Start);
127  return true;
128  }
129  }
130 
131  // Look for the field width (if any). Unlike printf, this is either
132  // a fixed integer or isn't present.
135  assert(Amt.getHowSpecified() == OptionalAmount::Constant);
136  FS.setFieldWidth(Amt);
137 
138  if (I == E) {
139  // No more characters left?
140  H.HandleIncompleteSpecifier(Start, E - Start);
141  return true;
142  }
143  }
144 
145  // Look for the length modifier.
146  if (ParseLengthModifier(FS, I, E, LO, /*scanf=*/true) && I == E) {
147  // No more characters left?
148  H.HandleIncompleteSpecifier(Start, E - Start);
149  return true;
150  }
151 
152  // Detect spurious null characters, which are likely errors.
153  if (*I == '\0') {
154  H.HandleNullChar(I);
155  return true;
156  }
157 
158  // Finally, look for the conversion specifier.
159  const char *conversionPosition = I++;
160  ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
161  switch (*conversionPosition) {
162  default:
163  break;
164  case '%': k = ConversionSpecifier::PercentArg; break;
165  case 'A': k = ConversionSpecifier::AArg; break;
166  case 'E': k = ConversionSpecifier::EArg; break;
167  case 'F': k = ConversionSpecifier::FArg; break;
168  case 'G': k = ConversionSpecifier::GArg; break;
169  case 'X': k = ConversionSpecifier::XArg; break;
170  case 'a': k = ConversionSpecifier::aArg; break;
171  case 'd': k = ConversionSpecifier::dArg; break;
172  case 'e': k = ConversionSpecifier::eArg; break;
173  case 'f': k = ConversionSpecifier::fArg; break;
174  case 'g': k = ConversionSpecifier::gArg; break;
175  case 'i': k = ConversionSpecifier::iArg; break;
176  case 'n': k = ConversionSpecifier::nArg; break;
177  case 'c': k = ConversionSpecifier::cArg; break;
178  case 'C': k = ConversionSpecifier::CArg; break;
179  case 'S': k = ConversionSpecifier::SArg; break;
180  case '[': k = ConversionSpecifier::ScanListArg; break;
181  case 'u': k = ConversionSpecifier::uArg; break;
182  case 'x': k = ConversionSpecifier::xArg; break;
183  case 'o': k = ConversionSpecifier::oArg; break;
184  case 's': k = ConversionSpecifier::sArg; break;
185  case 'p': k = ConversionSpecifier::pArg; break;
186  // Apple extensions
187  // Apple-specific
188  case 'D':
189  if (Target.getTriple().isOSDarwin())
191  break;
192  case 'O':
193  if (Target.getTriple().isOSDarwin())
195  break;
196  case 'U':
197  if (Target.getTriple().isOSDarwin())
199  break;
200  }
201  ScanfConversionSpecifier CS(conversionPosition, k);
202  if (k == ScanfConversionSpecifier::ScanListArg) {
203  if (ParseScanList(H, CS, I, E))
204  return true;
205  }
206  FS.setConversionSpecifier(CS);
208  && !FS.usesPositionalArg())
209  FS.setArgIndex(argIndex++);
210 
211  // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212  // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213 
214  if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215  unsigned Len = I - Beg;
216  if (ParseUTF8InvalidSpecifier(Beg, E, Len)) {
217  CS.setEndScanList(Beg + Len);
218  FS.setConversionSpecifier(CS);
219  }
220  // Assume the conversion takes one argument.
221  return !H.HandleInvalidScanfConversionSpecifier(FS, Beg, Len);
222  }
223  return ScanfSpecifierResult(Start, FS);
224 }
225 
226 ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227  const ScanfConversionSpecifier &CS = getConversionSpecifier();
228 
229  if (!CS.consumesDataArgument())
230  return ArgType::Invalid();
231 
232  switch(CS.getKind()) {
233  // Signed int.
237  switch (LM.getKind()) {
239  return ArgType::PtrTo(Ctx.IntTy);
243  return ArgType::PtrTo(Ctx.ShortTy);
245  return ArgType::PtrTo(Ctx.LongTy);
248  return ArgType::PtrTo(Ctx.LongLongTy);
250  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
252  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
254  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
256  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
258  // GNU extension.
259  return ArgType::PtrTo(Ctx.LongLongTy);
265  return ArgType::Invalid();
266  }
267  llvm_unreachable("Unsupported LenghtModifier Type");
268 
269  // Unsigned int.
276  switch (LM.getKind()) {
278  return ArgType::PtrTo(Ctx.UnsignedIntTy);
280  return ArgType::PtrTo(Ctx.UnsignedCharTy);
282  return ArgType::PtrTo(Ctx.UnsignedShortTy);
284  return ArgType::PtrTo(Ctx.UnsignedLongTy);
289  return ArgType::PtrTo(ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
291  return ArgType::PtrTo(ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
293  return ArgType::PtrTo(ArgType(Ctx.getSizeType(), "size_t"));
295  return ArgType::PtrTo(
296  ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t"));
298  // GNU extension.
305  return ArgType::Invalid();
306  }
307  llvm_unreachable("Unsupported LenghtModifier Type");
308 
309  // Float.
318  switch (LM.getKind()) {
320  return ArgType::PtrTo(Ctx.FloatTy);
322  return ArgType::PtrTo(Ctx.DoubleTy);
324  return ArgType::PtrTo(Ctx.LongDoubleTy);
325  default:
326  return ArgType::Invalid();
327  }
328 
329  // Char, string and scanlist.
333  switch (LM.getKind()) {
338  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
343  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
345  LLVM_FALLTHROUGH;
346  default:
347  return ArgType::Invalid();
348  }
351  // FIXME: Mac OS X specific?
352  switch (LM.getKind()) {
355  return ArgType::PtrTo(ArgType(Ctx.getWideCharType(), "wchar_t"));
358  return ArgType::PtrTo(ArgType(ArgType::WCStrTy, "wchar_t *"));
360  if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
362  LLVM_FALLTHROUGH;
363  default:
364  return ArgType::Invalid();
365  }
366 
367  // Pointer.
370 
371  // Write-back.
373  switch (LM.getKind()) {
375  return ArgType::PtrTo(Ctx.IntTy);
377  return ArgType::PtrTo(Ctx.SignedCharTy);
379  return ArgType::PtrTo(Ctx.ShortTy);
381  return ArgType::PtrTo(Ctx.LongTy);
384  return ArgType::PtrTo(Ctx.LongLongTy);
386  return ArgType::PtrTo(ArgType(Ctx.LongLongTy, "__int64"));
388  return ArgType::PtrTo(ArgType(Ctx.getIntMaxType(), "intmax_t"));
390  return ArgType::PtrTo(ArgType(Ctx.getSignedSizeType(), "ssize_t"));
392  return ArgType::PtrTo(ArgType(Ctx.getPointerDiffType(), "ptrdiff_t"));
394  return ArgType(); // FIXME: Is this a known extension?
400  return ArgType::Invalid();
401  }
402 
403  default:
404  break;
405  }
406 
407  return ArgType();
408 }
409 
410 bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
411  const LangOptions &LangOpt,
412  ASTContext &Ctx) {
413 
414  // %n is different from other conversion specifiers; don't try to fix it.
415  if (CS.getKind() == ConversionSpecifier::nArg)
416  return false;
417 
418  if (!QT->isPointerType())
419  return false;
420 
421  QualType PT = QT->getPointeeType();
422 
423  // If it's an enum, get its underlying type.
424  if (const EnumType *ETy = PT->getAs<EnumType>()) {
425  // Don't try to fix incomplete enums.
426  if (!ETy->getDecl()->isComplete())
427  return false;
428  PT = ETy->getDecl()->getIntegerType();
429  }
430 
431  const BuiltinType *BT = PT->getAs<BuiltinType>();
432  if (!BT)
433  return false;
434 
435  // Pointer to a character.
436  if (PT->isAnyCharacterType()) {
437  CS.setKind(ConversionSpecifier::sArg);
438  if (PT->isWideCharType())
439  LM.setKind(LengthModifier::AsWideChar);
440  else
441  LM.setKind(LengthModifier::None);
442 
443  // If we know the target array length, we can use it as a field width.
444  if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(RawQT)) {
445  if (CAT->getSizeModifier() == ArrayType::Normal)
447  CAT->getSize().getZExtValue() - 1,
448  "", 0, false);
449 
450  }
451  return true;
452  }
453 
454  // Figure out the length modifier.
455  switch (BT->getKind()) {
456  // no modifier
457  case BuiltinType::UInt:
458  case BuiltinType::Int:
459  case BuiltinType::Float:
460  LM.setKind(LengthModifier::None);
461  break;
462 
463  // hh
464  case BuiltinType::Char_U:
465  case BuiltinType::UChar:
466  case BuiltinType::Char_S:
467  case BuiltinType::SChar:
468  LM.setKind(LengthModifier::AsChar);
469  break;
470 
471  // h
472  case BuiltinType::Short:
473  case BuiltinType::UShort:
474  LM.setKind(LengthModifier::AsShort);
475  break;
476 
477  // l
478  case BuiltinType::Long:
479  case BuiltinType::ULong:
480  case BuiltinType::Double:
481  LM.setKind(LengthModifier::AsLong);
482  break;
483 
484  // ll
485  case BuiltinType::LongLong:
486  case BuiltinType::ULongLong:
487  LM.setKind(LengthModifier::AsLongLong);
488  break;
489 
490  // L
491  case BuiltinType::LongDouble:
492  LM.setKind(LengthModifier::AsLongDouble);
493  break;
494 
495  // Don't know.
496  default:
497  return false;
498  }
499 
500  // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
501  if (isa<TypedefType>(PT) && (LangOpt.C99 || LangOpt.CPlusPlus11))
502  namedTypeToLengthModifier(PT, LM);
503 
504  // If fixing the length modifier was enough, we are done.
505  if (hasValidLengthModifier(Ctx.getTargetInfo())) {
506  const analyze_scanf::ArgType &AT = getArgType(Ctx);
507  if (AT.isValid() && AT.matchesType(Ctx, QT))
508  return true;
509  }
510 
511  // Figure out the conversion specifier.
512  if (PT->isRealFloatingType())
513  CS.setKind(ConversionSpecifier::fArg);
514  else if (PT->isSignedIntegerType())
515  CS.setKind(ConversionSpecifier::dArg);
516  else if (PT->isUnsignedIntegerType())
517  CS.setKind(ConversionSpecifier::uArg);
518  else
519  llvm_unreachable("Unexpected type");
520 
521  return true;
522 }
523 
524 void ScanfSpecifier::toString(raw_ostream &os) const {
525  os << "%";
526 
527  if (usesPositionalArg())
528  os << getPositionalArgIndex() << "$";
529  if (SuppressAssignment)
530  os << "*";
531 
532  FieldWidth.toString(os);
533  os << LM.toString();
534  os << CS.toString();
535 }
536 
538  const char *I,
539  const char *E,
540  const LangOptions &LO,
541  const TargetInfo &Target) {
542 
543  unsigned argIndex = 0;
544 
545  // Keep looking for a format specifier until we have exhausted the string.
546  while (I != E) {
547  const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, I, E, argIndex,
548  LO, Target);
549  // Did a fail-stop error of any kind occur when parsing the specifier?
550  // If so, don't do any more processing.
551  if (FSR.shouldStop())
552  return true;
553  // Did we exhaust the string or encounter an error that
554  // we can recover from?
555  if (!FSR.hasValue())
556  continue;
557  // We have a format specifier. Pass it to the callback.
558  if (!H.HandleScanfSpecifier(FSR.getValue(), FSR.getStart(),
559  I - FSR.getStart())) {
560  return true;
561  }
562  }
563  assert(I == E && "Format string not exhausted");
564  return false;
565 }
clang::analyze_format_string::SpecifierResult< ScanfSpecifier > ScanfSpecifierResult
CanQualType LongLongTy
Definition: ASTContext.h:1025
virtual bool HandleScanfSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:730
A (possibly-)qualified type.
Definition: Type.h:638
QualType getPointerDiffType() const
Return the unique type for "ptrdiff_t" (C99 7.17) defined in <stddef.h>.
const OptionalFlag & getSuppressAssignment() const
Definition: FormatString.h:642
Kind getKind() const
Definition: Type.h:2418
QualType getPointeeType() const
If this is a pointer, ObjC object pointer, or block pointer, this returns the respective pointee...
Definition: Type.cpp:505
const llvm::Triple & getTriple() const
Returns the target triple of the primary target.
Definition: TargetInfo.h:949
bool isRealFloatingType() const
Floating point categories.
Definition: Type.cpp:1937
bool ParseArgPosition(FormatStringHandler &H, FormatSpecifier &CS, const char *Start, const char *&Beg, const char *E)
CanQualType LongTy
Definition: ASTContext.h:1025
const TargetInfo & getTargetInfo() const
Definition: ASTContext.h:690
bool isUnsignedIntegerType() const
Return true if this is an integer type that is unsigned, according to C99 6.2.5p6 [which returns true...
Definition: Type.cpp:1884
const T * getAs() const
Member-template getAs<specific type>&#39;.
Definition: Type.h:6748
bool isWideCharType() const
Definition: Type.cpp:1796
Holds long-lived AST nodes (such as types and decls) that can be referred to throughout the semantic ...
Definition: ASTContext.h:155
static std::string toString(const clang::SanitizerSet &Sanitizers)
Produce a string containing comma-separated names of sanitizers in Sanitizers set.
bool ParseScanfString(FormatStringHandler &H, const char *beg, const char *end, const LangOptions &LO, const TargetInfo &Target)
Keeps track of the various options that can be enabled, which controls the dialect of C or C++ that i...
Definition: LangOptions.h:50
QualType getUnsignedPointerDiffType() const
Return the unique unsigned counterpart of "ptrdiff_t" integer type.
static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H, const char *&Beg, const char *E, unsigned &argIndex, const LangOptions &LO, const TargetInfo &Target)
void setFieldWidth(const OptionalAmount &Amt)
Definition: FormatString.h:434
MatchKind matchesType(ASTContext &C, QualType argTy) const
Represents the length modifier in a format string in scanf/printf.
Definition: FormatString.h:65
CanQualType LongDoubleTy
Definition: ASTContext.h:1028
CanQualType UnsignedCharTy
Definition: ASTContext.h:1026
static ArgType PtrTo(const ArgType &A)
Create an ArgType which corresponds to the type pointer to A.
Definition: FormatString.h:280
Exposes information about the current target.
Definition: TargetInfo.h:54
CanQualType ShortTy
Definition: ASTContext.h:1025
bool isSignedIntegerType() const
Return true if this is an integer type that is signed, according to C99 6.2.5p4 [char, signed char, short, int, long..], or an enum decl which has a signed representation.
Definition: Type.cpp:1844
virtual void HandleNullChar(const char *nullCharacter)
Definition: FormatString.h:682
CanQualType SignedCharTy
Definition: ASTContext.h:1025
CanQualType getUIntMaxType() const
Return the unique type for "uintmax_t" (C99 7.18.1.5), defined in <stdint.h>.
QualType getWideCharType() const
Return the type of wide characters.
Definition: ASTContext.h:1565
Kind
A helper class that allows the use of isa/cast/dyncast to detect TagType objects of enums...
Definition: Type.h:4396
const ConstantArrayType * getAsConstantArrayType(QualType T) const
Definition: ASTContext.h:2413
CanQualType FloatTy
Definition: ASTContext.h:1028
CanQualType getSignedSizeType() const
Return the unique signed counterpart of the integer type corresponding to size_t. ...
void setConversionSpecifier(const ScanfConversionSpecifier &cs)
Definition: FormatString.h:646
bool isAnyCharacterType() const
Determine whether this type is any of the built-in character types.
Definition: Type.cpp:1823
CanQualType UnsignedShortTy
Definition: ASTContext.h:1026
Dataflow Directional Tag Classes.
void setSuppressAssignment(const char *position)
Definition: FormatString.h:638
Pieces specific to fscanf format strings.
Definition: FormatString.h:610
CanQualType UnsignedLongLongTy
Definition: ASTContext.h:1027
static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS, const char *&Beg, const char *E)
bool ParseUTF8InvalidSpecifier(const char *SpecifierBegin, const char *FmtStrEnd, unsigned &Len)
Returns true if the invalid specifier in SpecifierBegin is a UTF-8 string; check that it won&#39;t go fur...
Common components of both fprintf and fscanf format strings.
Definition: FormatString.h:30
CanQualType UnsignedLongTy
Definition: ASTContext.h:1026
bool ParseLengthModifier(FormatSpecifier &FS, const char *&Beg, const char *E, const LangOptions &LO, bool IsScanf=false)
Returns true if a LengthModifier was parsed and installed in the FormatSpecifier& argument...
virtual void HandleIncompleteSpecifier(const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:691
This class is used for builtin types like &#39;int&#39;.
Definition: Type.h:2391
Defines the clang::TargetInfo interface.
OptionalAmount ParseAmount(const char *&Beg, const char *E)
CanQualType IntTy
Definition: ASTContext.h:1025
bool isPointerType() const
Definition: Type.h:6296
CanQualType getIntMaxType() const
Return the unique type for "intmax_t" (C99 7.18.1.5), defined in <stdint.h>.
virtual void HandleIncompleteScanList(const char *start, const char *end)
Definition: FormatString.h:736
CanQualType DoubleTy
Definition: ASTContext.h:1028
virtual bool HandleInvalidScanfConversionSpecifier(const analyze_scanf::ScanfSpecifier &FS, const char *startSpecifier, unsigned specifierLen)
Definition: FormatString.h:723
Represents the canonical version of C arrays with a specified constant size.
Definition: Type.h:2872
CanQualType UnsignedIntTy
Definition: ASTContext.h:1026
CanQualType getSizeType() const
Return the unique type for "size_t" (C99 7.17), defined in <stddef.h>.