1//= ScanfFormatString.cpp - Analysis of printf format strings --*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Handling of format string in scanf and friends. The structure of format
10// strings for fscanf() are described in C99 7.19.6.2.
11//
12//===----------------------------------------------------------------------===//
13
14#include "FormatStringParsing.h"
15#include "clang/AST/FormatString.h"
16#include "clang/Basic/TargetInfo.h"
17
18using clang::UpdateOnReturn;
19using clang::analyze_format_string::ArgType;
20using clang::analyze_format_string::ConversionSpecifier;
21using clang::analyze_format_string::FormatStringHandler;
22using clang::analyze_format_string::LengthModifier;
23using clang::analyze_format_string::OptionalAmount;
24using clang::analyze_scanf::ScanfConversionSpecifier;
25using clang::analyze_scanf::ScanfSpecifier;
26using namespace clang;
27
28typedef clang::analyze_format_string::SpecifierResult<ScanfSpecifier>
29 ScanfSpecifierResult;
30
31static bool ParseScanList(FormatStringHandler &H, ScanfConversionSpecifier &CS,
32 const char *&Beg, const char *E) {
33 const char *I = Beg;
34 const char *start = I - 1;
35 UpdateOnReturn<const char *> UpdateBeg(Beg, I);
36
37 // No more characters?
38 if (I == E) {
39 H.HandleIncompleteScanList(start, end: I);
40 return true;
41 }
42
43 // Special case: ']' is the first character.
44 if (*I == ']') {
45 if (++I == E) {
46 H.HandleIncompleteScanList(start, end: I - 1);
47 return true;
48 }
49 }
50
51 // Special case: "^]" are the first characters.
52 if (I + 1 != E && I[0] == '^' && I[1] == ']') {
53 I += 2;
54 if (I == E) {
55 H.HandleIncompleteScanList(start, end: I - 1);
56 return true;
57 }
58 }
59
60 // Look for a ']' character which denotes the end of the scan list.
61 while (*I != ']') {
62 if (++I == E) {
63 H.HandleIncompleteScanList(start, end: I - 1);
64 return true;
65 }
66 }
67
68 CS.setEndScanList(I);
69 return false;
70}
71
72// FIXME: Much of this is copy-paste from ParsePrintfSpecifier.
73// We can possibly refactor.
74static ScanfSpecifierResult ParseScanfSpecifier(FormatStringHandler &H,
75 const char *&Beg, const char *E,
76 unsigned &argIndex,
77 const LangOptions &LO,
78 const TargetInfo &Target) {
79 using namespace clang::analyze_format_string;
80 using namespace clang::analyze_scanf;
81 const char *I = Beg;
82 const char *Start = nullptr;
83 UpdateOnReturn<const char *> UpdateBeg(Beg, I);
84
85 // Look for a '%' character that indicates the start of a format specifier.
86 for (; I != E; ++I) {
87 char c = *I;
88 if (c == '\0') {
89 // Detect spurious null characters, which are likely errors.
90 H.HandleNullChar(nullCharacter: I);
91 return true;
92 }
93 if (c == '%') {
94 Start = I++; // Record the start of the format specifier.
95 break;
96 }
97 }
98
99 // No format specifier found?
100 if (!Start)
101 return false;
102
103 if (I == E) {
104 // No more characters left?
105 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
106 return true;
107 }
108
109 ScanfSpecifier FS;
110 if (ParseArgPosition(H, CS&: FS, Start, Beg&: I, E))
111 return true;
112
113 if (I == E) {
114 // No more characters left?
115 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
116 return true;
117 }
118
119 // Look for '*' flag if it is present.
120 if (*I == '*') {
121 FS.setSuppressAssignment(I);
122 if (++I == E) {
123 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
124 return true;
125 }
126 }
127
128 // Look for the field width (if any). Unlike printf, this is either
129 // a fixed integer or isn't present.
130 const OptionalAmount &Amt = clang::analyze_format_string::ParseAmount(Beg&: I, E);
131 if (Amt.getHowSpecified() != OptionalAmount::NotSpecified) {
132 assert(Amt.getHowSpecified() == OptionalAmount::Constant);
133 FS.setFieldWidth(Amt);
134
135 if (I == E) {
136 // No more characters left?
137 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
138 return true;
139 }
140 }
141
142 // Look for the length modifier.
143 if (ParseLengthModifier(FS, Beg&: I, E, LO, /*IsScanf=*/true) && I == E) {
144 // No more characters left?
145 H.HandleIncompleteSpecifier(startSpecifier: Start, specifierLen: E - Start);
146 return true;
147 }
148
149 // Detect spurious null characters, which are likely errors.
150 if (*I == '\0') {
151 H.HandleNullChar(nullCharacter: I);
152 return true;
153 }
154
155 // Finally, look for the conversion specifier.
156 const char *conversionPosition = I++;
157 ScanfConversionSpecifier::Kind k = ScanfConversionSpecifier::InvalidSpecifier;
158 switch (*conversionPosition) {
159 default:
160 break;
161 case '%':
162 k = ConversionSpecifier::PercentArg;
163 break;
164 case 'b':
165 k = ConversionSpecifier::bArg;
166 break;
167 case 'A':
168 k = ConversionSpecifier::AArg;
169 break;
170 case 'E':
171 k = ConversionSpecifier::EArg;
172 break;
173 case 'F':
174 k = ConversionSpecifier::FArg;
175 break;
176 case 'G':
177 k = ConversionSpecifier::GArg;
178 break;
179 case 'X':
180 k = ConversionSpecifier::XArg;
181 break;
182 case 'a':
183 k = ConversionSpecifier::aArg;
184 break;
185 case 'd':
186 k = ConversionSpecifier::dArg;
187 break;
188 case 'e':
189 k = ConversionSpecifier::eArg;
190 break;
191 case 'f':
192 k = ConversionSpecifier::fArg;
193 break;
194 case 'g':
195 k = ConversionSpecifier::gArg;
196 break;
197 case 'i':
198 k = ConversionSpecifier::iArg;
199 break;
200 case 'n':
201 k = ConversionSpecifier::nArg;
202 break;
203 case 'c':
204 k = ConversionSpecifier::cArg;
205 break;
206 case 'C':
207 k = ConversionSpecifier::CArg;
208 break;
209 case 'S':
210 k = ConversionSpecifier::SArg;
211 break;
212 case '[':
213 k = ConversionSpecifier::ScanListArg;
214 break;
215 case 'u':
216 k = ConversionSpecifier::uArg;
217 break;
218 case 'x':
219 k = ConversionSpecifier::xArg;
220 break;
221 case 'o':
222 k = ConversionSpecifier::oArg;
223 break;
224 case 's':
225 k = ConversionSpecifier::sArg;
226 break;
227 case 'p':
228 k = ConversionSpecifier::pArg;
229 break;
230 // Apple extensions
231 // Apple-specific
232 case 'D':
233 if (Target.getTriple().isOSDarwin())
234 k = ConversionSpecifier::DArg;
235 break;
236 case 'O':
237 if (Target.getTriple().isOSDarwin())
238 k = ConversionSpecifier::OArg;
239 break;
240 case 'U':
241 if (Target.getTriple().isOSDarwin())
242 k = ConversionSpecifier::UArg;
243 break;
244 }
245 ScanfConversionSpecifier CS(conversionPosition, k);
246 if (k == ScanfConversionSpecifier::ScanListArg) {
247 if (ParseScanList(H, CS, Beg&: I, E))
248 return true;
249 }
250 FS.setConversionSpecifier(CS);
251 if (CS.consumesDataArgument() && !FS.getSuppressAssignment() &&
252 !FS.usesPositionalArg())
253 FS.setArgIndex(argIndex++);
254
255 // FIXME: '%' and '*' doesn't make sense. Issue a warning.
256 // FIXME: 'ConsumedSoFar' and '*' doesn't make sense.
257
258 if (k == ScanfConversionSpecifier::InvalidSpecifier) {
259 unsigned Len = I - Beg;
260 if (ParseUTF8InvalidSpecifier(SpecifierBegin: Beg, FmtStrEnd: E, Len)) {
261 CS.setEndScanList(Beg + Len);
262 FS.setConversionSpecifier(CS);
263 }
264 // Assume the conversion takes one argument.
265 return !H.HandleInvalidScanfConversionSpecifier(FS, startSpecifier: Beg, specifierLen: Len);
266 }
267 return ScanfSpecifierResult(Start, FS);
268}
269
270ArgType ScanfSpecifier::getArgType(ASTContext &Ctx) const {
271 const ScanfConversionSpecifier &CS = getConversionSpecifier();
272
273 if (!CS.consumesDataArgument())
274 return ArgType::Invalid();
275
276 switch (CS.getKind()) {
277 // Signed int.
278 case ConversionSpecifier::dArg:
279 case ConversionSpecifier::DArg:
280 case ConversionSpecifier::iArg:
281 switch (LM.getKind()) {
282 case LengthModifier::None:
283 return ArgType::PtrTo(A: Ctx.IntTy);
284 case LengthModifier::AsChar:
285 return ArgType::PtrTo(A: ArgType::AnyCharTy);
286 case LengthModifier::AsShort:
287 return ArgType::PtrTo(A: Ctx.ShortTy);
288 case LengthModifier::AsLong:
289 return ArgType::PtrTo(A: Ctx.LongTy);
290 case LengthModifier::AsLongLong:
291 case LengthModifier::AsQuad:
292 return ArgType::PtrTo(A: Ctx.LongLongTy);
293 case LengthModifier::AsInt64:
294 return ArgType::PtrTo(A: ArgType(Ctx.LongLongTy, "__int64"));
295 case LengthModifier::AsIntMax:
296 return ArgType::PtrTo(A: ArgType(Ctx.getIntMaxType(), "intmax_t"));
297 case LengthModifier::AsSizeT:
298 return ArgType::PtrTo(A: ArgType::makeSizeT(
299 A: ArgType(Ctx.getSignedSizeType(), "signed size_t")));
300 case LengthModifier::AsPtrDiff:
301 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
302 A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
303 case LengthModifier::AsIntN:
304 case LengthModifier::AsFastIntN:
305 return ArgType::PtrTo(A: ArgType::makeIntNType(Ctx, LengthMod: LM, /*Signed=*/true));
306 case LengthModifier::AsLongDouble:
307 // GNU extension.
308 return ArgType::PtrTo(A: Ctx.LongLongTy);
309 case LengthModifier::AsAllocate:
310 case LengthModifier::AsMAllocate:
311 case LengthModifier::AsInt32:
312 case LengthModifier::AsInt3264:
313 case LengthModifier::AsWide:
314 case LengthModifier::AsShortLong:
315 case LengthModifier::AsDecimal32:
316 case LengthModifier::AsDecimal64:
317 case LengthModifier::AsDecimal128:
318 return ArgType::Invalid();
319 }
320 llvm_unreachable("Unsupported LengthModifier Type");
321
322 // Unsigned int.
323 case ConversionSpecifier::bArg:
324 case ConversionSpecifier::oArg:
325 case ConversionSpecifier::OArg:
326 case ConversionSpecifier::uArg:
327 case ConversionSpecifier::UArg:
328 case ConversionSpecifier::xArg:
329 case ConversionSpecifier::XArg:
330 switch (LM.getKind()) {
331 case LengthModifier::None:
332 return ArgType::PtrTo(A: Ctx.UnsignedIntTy);
333 case LengthModifier::AsChar:
334 return ArgType::PtrTo(A: Ctx.UnsignedCharTy);
335 case LengthModifier::AsShort:
336 return ArgType::PtrTo(A: Ctx.UnsignedShortTy);
337 case LengthModifier::AsLong:
338 return ArgType::PtrTo(A: Ctx.UnsignedLongTy);
339 case LengthModifier::AsLongLong:
340 case LengthModifier::AsQuad:
341 return ArgType::PtrTo(A: Ctx.UnsignedLongLongTy);
342 case LengthModifier::AsInt64:
343 return ArgType::PtrTo(
344 A: ArgType(Ctx.UnsignedLongLongTy, "unsigned __int64"));
345 case LengthModifier::AsIntMax:
346 return ArgType::PtrTo(A: ArgType(Ctx.getUIntMaxType(), "uintmax_t"));
347 case LengthModifier::AsSizeT:
348 return ArgType::PtrTo(
349 A: ArgType::makeSizeT(A: ArgType(Ctx.getSizeType(), "size_t")));
350 case LengthModifier::AsPtrDiff:
351 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
352 A: ArgType(Ctx.getUnsignedPointerDiffType(), "unsigned ptrdiff_t")));
353 case LengthModifier::AsIntN:
354 case LengthModifier::AsFastIntN:
355 return ArgType::PtrTo(A: ArgType::makeIntNType(Ctx, LengthMod: LM, /*Signed=*/false));
356 case LengthModifier::AsLongDouble:
357 // GNU extension.
358 return ArgType::PtrTo(A: Ctx.UnsignedLongLongTy);
359 case LengthModifier::AsAllocate:
360 case LengthModifier::AsMAllocate:
361 case LengthModifier::AsInt32:
362 case LengthModifier::AsInt3264:
363 case LengthModifier::AsWide:
364 case LengthModifier::AsShortLong:
365 case LengthModifier::AsDecimal32:
366 case LengthModifier::AsDecimal64:
367 case LengthModifier::AsDecimal128:
368 return ArgType::Invalid();
369 }
370 llvm_unreachable("Unsupported LengthModifier Type");
371
372 // Float.
373 case ConversionSpecifier::aArg:
374 case ConversionSpecifier::AArg:
375 case ConversionSpecifier::eArg:
376 case ConversionSpecifier::EArg:
377 case ConversionSpecifier::fArg:
378 case ConversionSpecifier::FArg:
379 case ConversionSpecifier::gArg:
380 case ConversionSpecifier::GArg:
381 switch (LM.getKind()) {
382 case LengthModifier::None:
383 return ArgType::PtrTo(A: Ctx.FloatTy);
384 case LengthModifier::AsLong:
385 return ArgType::PtrTo(A: Ctx.DoubleTy);
386 case LengthModifier::AsLongDouble:
387 return ArgType::PtrTo(A: Ctx.LongDoubleTy);
388 case LengthModifier::AsDecimal32:
389 return ArgType::PtrTo(A: ArgType::Unsupported(N: "_Decimal32"));
390 case LengthModifier::AsDecimal64:
391 return ArgType::PtrTo(A: ArgType::Unsupported(N: "_Decimal64"));
392 case LengthModifier::AsDecimal128:
393 return ArgType::PtrTo(A: ArgType::Unsupported(N: "_Decimal128"));
394 default:
395 return ArgType::Invalid();
396 }
397
398 // Char, string and scanlist.
399 case ConversionSpecifier::cArg:
400 case ConversionSpecifier::sArg:
401 case ConversionSpecifier::ScanListArg:
402 switch (LM.getKind()) {
403 case LengthModifier::None:
404 return ArgType::PtrTo(A: ArgType::AnyCharTy);
405 case LengthModifier::AsLong:
406 case LengthModifier::AsWide:
407 return ArgType::PtrTo(A: ArgType(Ctx.getWideCharType(), "wchar_t"));
408 case LengthModifier::AsAllocate:
409 case LengthModifier::AsMAllocate:
410 return ArgType::PtrTo(A: ArgType::CStrTy);
411 case LengthModifier::AsShort:
412 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
413 return ArgType::PtrTo(A: ArgType::AnyCharTy);
414 [[fallthrough]];
415 default:
416 return ArgType::Invalid();
417 }
418 case ConversionSpecifier::CArg:
419 case ConversionSpecifier::SArg:
420 // FIXME: Mac OS X specific?
421 switch (LM.getKind()) {
422 case LengthModifier::None:
423 case LengthModifier::AsWide:
424 return ArgType::PtrTo(A: ArgType(Ctx.getWideCharType(), "wchar_t"));
425 case LengthModifier::AsAllocate:
426 case LengthModifier::AsMAllocate:
427 return ArgType::PtrTo(A: ArgType(ArgType::WCStrTy, "wchar_t *"));
428 case LengthModifier::AsShort:
429 if (Ctx.getTargetInfo().getTriple().isOSMSVCRT())
430 return ArgType::PtrTo(A: ArgType::AnyCharTy);
431 [[fallthrough]];
432 default:
433 return ArgType::Invalid();
434 }
435
436 // Pointer.
437 case ConversionSpecifier::pArg:
438 return ArgType::PtrTo(A: ArgType::CPointerTy);
439
440 // Write-back.
441 case ConversionSpecifier::nArg:
442 switch (LM.getKind()) {
443 case LengthModifier::None:
444 return ArgType::PtrTo(A: Ctx.IntTy);
445 case LengthModifier::AsChar:
446 return ArgType::PtrTo(A: Ctx.SignedCharTy);
447 case LengthModifier::AsShort:
448 return ArgType::PtrTo(A: Ctx.ShortTy);
449 case LengthModifier::AsLong:
450 return ArgType::PtrTo(A: Ctx.LongTy);
451 case LengthModifier::AsLongLong:
452 case LengthModifier::AsQuad:
453 return ArgType::PtrTo(A: Ctx.LongLongTy);
454 case LengthModifier::AsInt64:
455 return ArgType::PtrTo(A: ArgType(Ctx.LongLongTy, "__int64"));
456 case LengthModifier::AsIntMax:
457 return ArgType::PtrTo(A: ArgType(Ctx.getIntMaxType(), "intmax_t"));
458 case LengthModifier::AsSizeT:
459 return ArgType::PtrTo(A: ArgType::makeSizeT(
460 A: ArgType(Ctx.getSignedSizeType(), "signed size_t")));
461 case LengthModifier::AsPtrDiff:
462 return ArgType::PtrTo(A: ArgType::makePtrdiffT(
463 A: ArgType(Ctx.getPointerDiffType(), "ptrdiff_t")));
464 case LengthModifier::AsIntN:
465 case LengthModifier::AsFastIntN:
466 return ArgType::PtrTo(A: ArgType::makeIntNType(Ctx, LengthMod: LM, /*Signed=*/true));
467 case LengthModifier::AsLongDouble:
468 return ArgType(); // FIXME: Is this a known extension?
469 case LengthModifier::AsAllocate:
470 case LengthModifier::AsMAllocate:
471 case LengthModifier::AsInt32:
472 case LengthModifier::AsInt3264:
473 case LengthModifier::AsWide:
474 case LengthModifier::AsShortLong:
475 case LengthModifier::AsDecimal32:
476 case LengthModifier::AsDecimal64:
477 case LengthModifier::AsDecimal128:
478 return ArgType::Invalid();
479 }
480
481 default:
482 break;
483 }
484
485 return ArgType();
486}
487
488bool ScanfSpecifier::fixType(QualType QT, QualType RawQT,
489 const LangOptions &LangOpt, ASTContext &Ctx) {
490
491 // %n is different from other conversion specifiers; don't try to fix it.
492 if (CS.getKind() == ConversionSpecifier::nArg)
493 return false;
494
495 if (!QT->isPointerType())
496 return false;
497
498 QualType PT = QT->getPointeeType();
499
500 // If it's an enum, get its underlying type.
501 if (const auto *ED = PT->getAsEnumDecl()) {
502 // Don't try to fix incomplete enums.
503 if (!ED->isComplete())
504 return false;
505 PT = ED->getIntegerType();
506 }
507
508 const BuiltinType *BT = PT->getAs<BuiltinType>();
509 if (!BT)
510 return false;
511
512 // Pointer to a character.
513 if (PT->isAnyCharacterType()) {
514 CS.setKind(ConversionSpecifier::sArg);
515 if (PT->isWideCharType())
516 LM.setKind(LengthModifier::AsWideChar);
517 else
518 LM.setKind(LengthModifier::None);
519
520 // If we know the target array length, we can use it as a field width.
521 if (const ConstantArrayType *CAT = Ctx.getAsConstantArrayType(T: RawQT)) {
522 if (CAT->getSizeModifier() == ArraySizeModifier::Normal)
523 FieldWidth = OptionalAmount(OptionalAmount::Constant,
524 CAT->getZExtSize() - 1, "", 0, false);
525 }
526 return true;
527 }
528
529 // Figure out the length modifier.
530 switch (BT->getKind()) {
531 // no modifier
532 case BuiltinType::UInt:
533 case BuiltinType::Int:
534 case BuiltinType::Float:
535 LM.setKind(LengthModifier::None);
536 break;
537
538 // hh
539 case BuiltinType::Char_U:
540 case BuiltinType::UChar:
541 case BuiltinType::Char_S:
542 case BuiltinType::SChar:
543 LM.setKind(LengthModifier::AsChar);
544 break;
545
546 // h
547 case BuiltinType::Short:
548 case BuiltinType::UShort:
549 LM.setKind(LengthModifier::AsShort);
550 break;
551
552 // l
553 case BuiltinType::Long:
554 case BuiltinType::ULong:
555 case BuiltinType::Double:
556 LM.setKind(LengthModifier::AsLong);
557 break;
558
559 // ll
560 case BuiltinType::LongLong:
561 case BuiltinType::ULongLong:
562 LM.setKind(LengthModifier::AsLongLong);
563 break;
564
565 // L
566 case BuiltinType::LongDouble:
567 LM.setKind(LengthModifier::AsLongDouble);
568 break;
569
570 // Don't know.
571 default:
572 return false;
573 }
574
575 // Handle size_t, ptrdiff_t, etc. that have dedicated length modifiers in C99.
576 if (LangOpt.C99 || LangOpt.CPlusPlus11)
577 namedTypeToLengthModifier(Ctx, QT: PT, LM);
578
579 // If fixing the length modifier was enough, we are done.
580 if (hasValidLengthModifier(Target: Ctx.getTargetInfo(), LO: LangOpt)) {
581 const analyze_scanf::ArgType &AT = getArgType(Ctx);
582 if (AT.isValid() && AT.matchesType(C&: Ctx, argTy: QT))
583 return true;
584 }
585
586 // Figure out the conversion specifier.
587 if (PT->isRealFloatingType())
588 CS.setKind(ConversionSpecifier::fArg);
589 else if (PT->isSignedIntegerType())
590 CS.setKind(ConversionSpecifier::dArg);
591 else if (PT->isUnsignedIntegerType())
592 CS.setKind(ConversionSpecifier::uArg);
593 else
594 llvm_unreachable("Unexpected type");
595
596 return true;
597}
598
599void ScanfSpecifier::toString(raw_ostream &os) const {
600 os << "%";
601
602 if (usesPositionalArg())
603 os << getPositionalArgIndex() << "$";
604 if (SuppressAssignment)
605 os << "*";
606
607 FieldWidth.toString(os);
608 os << LM.toString();
609 os << CS.toString();
610}
611
612bool clang::analyze_format_string::ParseScanfString(FormatStringHandler &H,
613 const char *I,
614 const char *E,
615 const LangOptions &LO,
616 const TargetInfo &Target) {
617
618 unsigned argIndex = 0;
619
620 // Keep looking for a format specifier until we have exhausted the string.
621 while (I != E) {
622 const ScanfSpecifierResult &FSR =
623 ParseScanfSpecifier(H, Beg&: I, E, argIndex, LO, Target);
624 // Did a fail-stop error of any kind occur when parsing the specifier?
625 // If so, don't do any more processing.
626 if (FSR.shouldStop())
627 return true;
628 // Did we exhaust the string or encounter an error that
629 // we can recover from?
630 if (!FSR.hasValue())
631 continue;
632 // We have a format specifier. Pass it to the callback.
633 if (!H.HandleScanfSpecifier(FS: FSR.getValue(), startSpecifier: FSR.getStart(),
634 specifierLen: I - FSR.getStart())) {
635 return true;
636 }
637 }
638 assert(I == E && "Format string not exhausted");
639 return false;
640}
641