1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in scanf and friends. The structure of format
10// strings for fscanf() are described in C99 7.19.6.2.
11//
12//===----------------------------------------------------------------------===//
13
14#include "clang/AST/FormatString.h"
15#include "FormatStringParsing.h"
16#include "clang/Basic/TargetInfo.h"
17
18using clang::analyze_format_string::ArgType;
19using clang::analyze_format_string::FormatStringHandler;
20using clang::analyze_format_string::LengthModifier;
21using clang::analyze_format_string::OptionalAmount;
22using clang::analyze_format_string::ConversionSpecifier;
23using clang::analyze_scanf::ScanfConversionSpecifier;
24using clang::analyze_scanf::ScanfSpecifier;
25using clang::UpdateOnReturn;
26using namespace clang;
27
28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29 ScanfSpecifierResult;
30
31static bool ParseScanList(FormatStringHandler &H,
32 ScanfConversionSpecifier &CS,
33 const char *&Beg, const char *E) {
34 const char *I = Beg;
35 const char *start = I - 1;
36 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
37
38 // No more characters?
39 if (I == E) {
40 H.HandleIncompleteScanList(start, end: I);
41 return true;
42 }
43
44 // Special case: ']' is the first character.
45 if (*I == ']') {
46 if (++I == E) {
47 H.HandleIncompleteScanList(start, end: I - 1);
48 return true;
49 }
50 }
51
52 // Special case: "^]" are the first characters.
53 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
54 I += 2;
55 if (I == E) {
56 H.HandleIncompleteScanList(start, end: I - 1);
57 return true;
58 }
59 }
60
61 // Look for a ']' character which denotes the end of the scan list.
62 while (*I != ']') {
63 if (++I == E) {
64 H.HandleIncompleteScanList(start, end: I - 1);
65 return true;
66 }
67 }
68
69 CS.setEndScanList(I);
70 return false;
71}
72
73// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
74// We can possibly refactor.
75static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
76 const char *&Beg,
77 const char *E,
78 unsigned &argIndex,
79 const LangOptions &LO,
80 const TargetInfo &Target) {
81 using namespace clang::analyze_format_string;
82 using namespace clang::analyze_scanf;
83 const char *I = Beg;
84 const char *Start = nullptr;
85 UpdateOnReturn <const char*> UpdateBeg(Beg, I);
86
87 // Look for a '%' character that indicates the start of a format specifier.
88 for ( ; I != E ; ++I) {
89 char c = *I;
90 if (c == '\0') {
91 // Detect spurious null characters, which are likely errors.
92 H.HandleNullChar(nullCharacter: I);
93 return true;
94 }
95 if (c == '%') {
96 Start = I++; // Record the start of the format specifier.
97 break;
98 }
99 }
100
101 // No format specifier found?
102 if (!Start)
103 return false;
104
105 if (I == E) {
106 // No more characters left?
107 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
108 return true;
109 }
110
111 ScanfSpecifier FS;
112 if (ParseArgPosition(H, CS&: FS, Start, Beg&: I, E))
113 return true;
114
115 if (I == E) {
116 // No more characters left?
117 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
118 return true;
119 }
120
121 // Look for '*' flag if it is present.
122 if (*I == '*') {
123 FS.setSuppressAssignment(I);
124 if (++I == E) {
125 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
126 return true;
127 }
128 }
129
130 // Look for the field width (if any). Unlike printf, this is either
131 // a fixed integer or isn't present.
132 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(Beg&: I, E);
133 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
134 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
135 FS.setFieldWidth(Amt);
136
137 if (I == E) {
138 // No more characters left?
139 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
140 return true;
141 }
142 }
143
144 // Look for the length modifier.
145 if (ParseLengthModifier(FS, Beg&: I, E, LO, /*IsScanf=*/true) && I == E) {
146 // No more characters left?
147 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
148 return true;
149 }
150
151 // Detect spurious null characters, which are likely errors.
152 if (*I == '\0') {
153 H.HandleNullChar(nullCharacter: I);
154 return true;
155 }
156
157 // Finally, look for the conversion specifier.
158 const char *conversionPosition = I++;
159 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
160 switch (*conversionPosition) {
161 default:
162 break;
163 case '%': k = ConversionSpecifier::PercentArg; break;
164 case 'b': k = ConversionSpecifier::bArg; break;
165 case 'A': k = ConversionSpecifier::AArg; break;
166 case 'E': k = ConversionSpecifier::EArg; break;
167 case 'F': k = ConversionSpecifier::FArg; break;
168 case 'G': k = ConversionSpecifier::GArg; break;
169 case 'X': k = ConversionSpecifier::XArg; break;
170 case 'a': k = ConversionSpecifier::aArg; break;
171 case 'd': k = ConversionSpecifier::dArg; break;
172 case 'e': k = ConversionSpecifier::eArg; break;
173 case 'f': k = ConversionSpecifier::fArg; break;
174 case 'g': k = ConversionSpecifier::gArg; break;
175 case 'i': k = ConversionSpecifier::iArg; break;
176 case 'n': k = ConversionSpecifier::nArg; break;
177 case 'c': k = ConversionSpecifier::cArg; break;
178 case 'C': k = ConversionSpecifier::CArg; break;
179 case 'S': k = ConversionSpecifier::SArg; break;
180 case '[': k = ConversionSpecifier::ScanListArg; break;
181 case 'u': k = ConversionSpecifier::uArg; break;
182 case 'x': k = ConversionSpecifier::xArg; break;
183 case 'o': k = ConversionSpecifier::oArg; break;
184 case 's': k = ConversionSpecifier::sArg; break;
185 case 'p': k = ConversionSpecifier::pArg; break;
186 // Apple extensions
187 // Apple-specific
188 case 'D':
189 if (Target.getTriple().isOSDarwin())
190 k = ConversionSpecifier::DArg;
191 break;
192 case 'O':
193 if (Target.getTriple().isOSDarwin())
194 k = ConversionSpecifier::OArg;
195 break;
196 case 'U':
197 if (Target.getTriple().isOSDarwin())
198 k = ConversionSpecifier::UArg;
199 break;
200 }
201 ScanfConversionSpecifier CS(conversionPosition, k);
202 if (k == ScanfConversionSpecifier::ScanListArg) {
203 if (ParseScanList(H, CS, Beg&: I, E))
204 return true;
205 }
206 FS.setConversionSpecifier(CS);
207 if (CS.consumesDataArgument() && !FS.getSuppressAssignment()
208 && !FS.usesPositionalArg())
209 FS.setArgIndex(argIndex++);
210
211 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
212 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
213
214 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
215 unsigned Len = I - Beg;
216 if (ParseUTF8InvalidSpecifier(SpecifierBegin: Beg, FmtStrEnd: E, Len)) {
217 CS.setEndScanList(Beg + Len);
218 FS.setConversionSpecifier(CS);
219 }
220 // Assume the conversion takes one argument.
221 return !H.HandleInvalidScanfConversionSpecifier(FS, startSpecifier: Beg, specifierLen: Len);
222 }
223 return ScanfSpecifierResult(Start, FS);
224}
225
226ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
227 const ScanfConversionSpecifier &CS = getConversionSpecifier();
228
229 if (!CS.consumesDataArgument())
230 return ArgType::Invalid();
231
232 switch(CS.getKind()) {
233 // Signed int.
234 case ConversionSpecifier::dArg:
235 case ConversionSpecifier::DArg:
236 case ConversionSpecifier::iArg:
237 switch (LM.getKind()) {
238 case LengthModifier::None:
239 return ArgType::PtrTo(A: Ctx.IntTy);
240 case LengthModifier::AsChar:
241 return ArgType::PtrTo(A: ArgType::AnyCharTy);
242 case LengthModifier::AsShort:
243 return ArgType::PtrTo(A: Ctx.ShortTy);
244 case LengthModifier::AsLong:
245 return ArgType::PtrTo(A: Ctx.LongTy);
246 case LengthModifier::AsLongLong:
247 case LengthModifier::AsQuad:
248 return ArgType::PtrTo(A: Ctx.LongLongTy);
249 case LengthModifier::AsInt64:
250 return ArgType::PtrTo(A: ArgType(Ctx.LongLongTy, "__int64"));
251 case LengthModifier::AsIntMax:
252 return ArgType::PtrTo(A: ArgType(Ctx.getIntMaxType(), "intmax_t"));
253 case LengthModifier::AsSizeT:
254 return ArgType::PtrTo(A: ArgType::makeSizeT(
255 A: ArgType(Ctx.getSignedSizeType(), "signed size_t")));
256 case LengthModifier::AsPtrDiff:
257 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
258 A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
259 case LengthModifier::AsLongDouble:
260 // GNU extension.
261 return ArgType::PtrTo(A: Ctx.LongLongTy);
262 case LengthModifier::AsAllocate:
263 case LengthModifier::AsMAllocate:
264 case LengthModifier::AsInt32:
265 case LengthModifier::AsInt3264:
266 case LengthModifier::AsWide:
267 case LengthModifier::AsShortLong:
268 return ArgType::Invalid();
269 }
270 llvm_unreachable("Unsupported LengthModifier Type");
271
272 // Unsigned int.
273 case ConversionSpecifier::bArg:
274 case ConversionSpecifier::oArg:
275 case ConversionSpecifier::OArg:
276 case ConversionSpecifier::uArg:
277 case ConversionSpecifier::UArg:
278 case ConversionSpecifier::xArg:
279 case ConversionSpecifier::XArg:
280 switch (LM.getKind()) {
281 case LengthModifier::None:
282 return ArgType::PtrTo(A: Ctx.UnsignedIntTy);
283 case LengthModifier::AsChar:
284 return ArgType::PtrTo(A: Ctx.UnsignedCharTy);
285 case LengthModifier::AsShort:
286 return ArgType::PtrTo(A: Ctx.UnsignedShortTy);
287 case LengthModifier::AsLong:
288 return ArgType::PtrTo(A: Ctx.UnsignedLongTy);
289 case LengthModifier::AsLongLong:
290 case LengthModifier::AsQuad:
291 return ArgType::PtrTo(A: Ctx.UnsignedLongLongTy);
292 case LengthModifier::AsInt64:
293 return ArgType::PtrTo(A: ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
294 case LengthModifier::AsIntMax:
295 return ArgType::PtrTo(A: ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
296 case LengthModifier::AsSizeT:
297 return ArgType::PtrTo(
298 A: ArgType::makeSizeT(A: ArgType(Ctx.getSizeType(), "size_t")));
299 case LengthModifier::AsPtrDiff:
300 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
301 A: ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")));
302 case LengthModifier::AsLongDouble:
303 // GNU extension.
304 return ArgType::PtrTo(A: Ctx.UnsignedLongLongTy);
305 case LengthModifier::AsAllocate:
306 case LengthModifier::AsMAllocate:
307 case LengthModifier::AsInt32:
308 case LengthModifier::AsInt3264:
309 case LengthModifier::AsWide:
310 case LengthModifier::AsShortLong:
311 return ArgType::Invalid();
312 }
313 llvm_unreachable("Unsupported LengthModifier Type");
314
315 // Float.
316 case ConversionSpecifier::aArg:
317 case ConversionSpecifier::AArg:
318 case ConversionSpecifier::eArg:
319 case ConversionSpecifier::EArg:
320 case ConversionSpecifier::fArg:
321 case ConversionSpecifier::FArg:
322 case ConversionSpecifier::gArg:
323 case ConversionSpecifier::GArg:
324 switch (LM.getKind()) {
325 case LengthModifier::None:
326 return ArgType::PtrTo(A: Ctx.FloatTy);
327 case LengthModifier::AsLong:
328 return ArgType::PtrTo(A: Ctx.DoubleTy);
329 case LengthModifier::AsLongDouble:
330 return ArgType::PtrTo(A: Ctx.LongDoubleTy);
331 default:
332 return ArgType::Invalid();
333 }
334
335 // Char, string and scanlist.
336 case ConversionSpecifier::cArg:
337 case ConversionSpecifier::sArg:
338 case ConversionSpecifier::ScanListArg:
339 switch (LM.getKind()) {
340 case LengthModifier::None:
341 return ArgType::PtrTo(A: ArgType::AnyCharTy);
342 case LengthModifier::AsLong:
343 case LengthModifier::AsWide:
344 return ArgType::PtrTo(A: ArgType(Ctx.getWideCharType(), "wchar_t"));
345 case LengthModifier::AsAllocate:
346 case LengthModifier::AsMAllocate:
347 return ArgType::PtrTo(A: ArgType::CStrTy);
348 case LengthModifier::AsShort:
349 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
350 return ArgType::PtrTo(A: ArgType::AnyCharTy);
351 [[fallthrough]];
352 default:
353 return ArgType::Invalid();
354 }
355 case ConversionSpecifier::CArg:
356 case ConversionSpecifier::SArg:
357 // FIXME: Mac OS X specific?
358 switch (LM.getKind()) {
359 case LengthModifier::None:
360 case LengthModifier::AsWide:
361 return ArgType::PtrTo(A: ArgType(Ctx.getWideCharType(), "wchar_t"));
362 case LengthModifier::AsAllocate:
363 case LengthModifier::AsMAllocate:
364 return ArgType::PtrTo(A: ArgType(ArgType::WCStrTy, "wchar_t *"));
365 case LengthModifier::AsShort:
366 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
367 return ArgType::PtrTo(A: ArgType::AnyCharTy);
368 [[fallthrough]];
369 default:
370 return ArgType::Invalid();
371 }
372
373 // Pointer.
374 case ConversionSpecifier::pArg:
375 return ArgType::PtrTo(A: ArgType::CPointerTy);
376
377 // Write-back.
378 case ConversionSpecifier::nArg:
379 switch (LM.getKind()) {
380 case LengthModifier::None:
381 return ArgType::PtrTo(A: Ctx.IntTy);
382 case LengthModifier::AsChar:
383 return ArgType::PtrTo(A: Ctx.SignedCharTy);
384 case LengthModifier::AsShort:
385 return ArgType::PtrTo(A: Ctx.ShortTy);
386 case LengthModifier::AsLong:
387 return ArgType::PtrTo(A: Ctx.LongTy);
388 case LengthModifier::AsLongLong:
389 case LengthModifier::AsQuad:
390 return ArgType::PtrTo(A: Ctx.LongLongTy);
391 case LengthModifier::AsInt64:
392 return ArgType::PtrTo(A: ArgType(Ctx.LongLongTy, "__int64"));
393 case LengthModifier::AsIntMax:
394 return ArgType::PtrTo(A: ArgType(Ctx.getIntMaxType(), "intmax_t"));
395 case LengthModifier::AsSizeT:
396 return ArgType::PtrTo(A: ArgType::makeSizeT(
397 A: ArgType(Ctx.getSignedSizeType(), "signed size_t")));
398 case LengthModifier::AsPtrDiff:
399 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
400 A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
401 case LengthModifier::AsLongDouble:
402 return ArgType(); // FIXME: Is this a known extension?
403 case LengthModifier::AsAllocate:
404 case LengthModifier::AsMAllocate:
405 case LengthModifier::AsInt32:
406 case LengthModifier::AsInt3264:
407 case LengthModifier::AsWide:
408 case LengthModifier::AsShortLong:
409 return ArgType::Invalid();
410 }
411
412 default:
413 break;
414 }
415
416 return ArgType();
417}
418
419bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
420 const LangOptions &LangOpt,
421 ASTContext &Ctx) {
422
423 // %n is different from other conversion specifiers; don't try to fix it.
424 if (CS.getKind() == ConversionSpecifier::nArg)
425 return false;
426
427 if (!QT->isPointerType())
428 return false;
429
430 QualType PT = QT->getPointeeType();
431
432 // If it's an enum, get its underlying type.
433 if (const auto *ED = PT->getAsEnumDecl()) {
434 // Don't try to fix incomplete enums.
435 if (!ED->isComplete())
436 return false;
437 PT = ED->getIntegerType();
438 }
439
440 const BuiltinType *BT = PT->getAs<BuiltinType>();
441 if (!BT)
442 return false;
443
444 // Pointer to a character.
445 if (PT->isAnyCharacterType()) {
446 CS.setKind(ConversionSpecifier::sArg);
447 if (PT->isWideCharType())
448 LM.setKind(LengthModifier::AsWideChar);
449 else
450 LM.setKind(LengthModifier::None);
451
452 // If we know the target array length, we can use it as a field width.
453 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T: RawQT)) {
454 if (CAT->getSizeModifier() == ArraySizeModifier::Normal)
455 FieldWidth = OptionalAmount(OptionalAmount::Constant,
456 CAT->getZExtSize() - 1, "", 0, false);
457 }
458 return true;
459 }
460
461 // Figure out the length modifier.
462 switch (BT->getKind()) {
463 // no modifier
464 case BuiltinType::UInt:
465 case BuiltinType::Int:
466 case BuiltinType::Float:
467 LM.setKind(LengthModifier::None);
468 break;
469
470 // hh
471 case BuiltinType::Char_U:
472 case BuiltinType::UChar:
473 case BuiltinType::Char_S:
474 case BuiltinType::SChar:
475 LM.setKind(LengthModifier::AsChar);
476 break;
477
478 // h
479 case BuiltinType::Short:
480 case BuiltinType::UShort:
481 LM.setKind(LengthModifier::AsShort);
482 break;
483
484 // l
485 case BuiltinType::Long:
486 case BuiltinType::ULong:
487 case BuiltinType::Double:
488 LM.setKind(LengthModifier::AsLong);
489 break;
490
491 // ll
492 case BuiltinType::LongLong:
493 case BuiltinType::ULongLong:
494 LM.setKind(LengthModifier::AsLongLong);
495 break;
496
497 // L
498 case BuiltinType::LongDouble:
499 LM.setKind(LengthModifier::AsLongDouble);
500 break;
501
502 // Don't know.
503 default:
504 return false;
505 }
506
507 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
508 if (LangOpt.C99 || LangOpt.CPlusPlus11)
509 namedTypeToLengthModifier(Ctx, QT: PT, LM);
510
511 // If fixing the length modifier was enough, we are done.
512 if (hasValidLengthModifier(Target: Ctx.getTargetInfo(), LO: LangOpt)) {
513 const analyze_scanf::ArgType &AT = getArgType(Ctx);
514 if (AT.isValid() && AT.matchesType(C&: Ctx, argTy: QT))
515 return true;
516 }
517
518 // Figure out the conversion specifier.
519 if (PT->isRealFloatingType())
520 CS.setKind(ConversionSpecifier::fArg);
521 else if (PT->isSignedIntegerType())
522 CS.setKind(ConversionSpecifier::dArg);
523 else if (PT->isUnsignedIntegerType())
524 CS.setKind(ConversionSpecifier::uArg);
525 else
526 llvm_unreachable("Unexpected type");
527
528 return true;
529}
530
531void ScanfSpecifier::toString(raw_ostream &os) const {
532 os << "%";
533
534 if (usesPositionalArg())
535 os << getPositionalArgIndex() << "$";
536 if (SuppressAssignment)
537 os << "*";
538
539 FieldWidth.toString(os);
540 os << LM.toString();
541 os << CS.toString();
542}
543
544bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
545 const char *I,
546 const char *E,
547 const LangOptions &LO,
548 const TargetInfo &Target) {
549
550 unsigned argIndex = 0;
551
552 // Keep looking for a format specifier until we have exhausted the string.
553 while (I != E) {
554 const ScanfSpecifierResult &FSR = ParseScanfSpecifier(H, Beg&: I, E, argIndex,
555 LO, Target);
556 // Did a fail-stop error of any kind occur when parsing the specifier?
557 // If so, don't do any more processing.
558 if (FSR.shouldStop())
559 return true;
560 // Did we exhaust the string or encounter an error that
561 // we can recover from?
562 if (!FSR.hasValue())
563 continue;
564 // We have a format specifier. Pass it to the callback.
565 if (!H.HandleScanfSpecifier(FS: FSR.getValue(), startSpecifier: FSR.getStart(),
566 specifierLen: I - FSR.getStart())) {
567 return true;
568 }
569 }
570 assert(I == E && "Format string not exhausted");
571 return false;
572}
573