1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Scanf/printf implementation for use in *Sanitizer interceptors.
10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19 *out = internal_atoll(nptr: p);
20 while (*p >= '0' && *p <= '9')
21 ++p;
22 return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26 // n$
27 if (*p >= '0' && *p <= '9') {
28 int number;
29 const char *q = parse_number(p, out: &number);
30 CHECK(q);
31 if (*q == '$') {
32 *out = number;
33 p = q + 1;
34 }
35 }
36
37 // Otherwise, do not change p. This will be re-parsed later as the field
38 // width.
39 return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43 return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47 if (char_is_one_of(c: *p, s: "jztLq")) {
48 ll[0] = *p;
49 ++p;
50 } else if (*p == 'h') {
51 ll[0] = 'h';
52 ++p;
53 if (*p == 'h') {
54 ll[1] = 'h';
55 ++p;
56 }
57 } else if (*p == 'l') {
58 ll[0] = 'l';
59 ++p;
60 if (*p == 'l') {
61 ll[1] = 'l';
62 ++p;
63 }
64 }
65 return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70#if SANITIZER_GLIBC
71 if (char_is_one_of(c, s: "bB"))
72 return true;
73#endif
74 return char_is_one_of(c, s: "diouxXn");
75}
76
77// Returns true if the character is an floating point conversion specifier.
78static bool format_is_float_conv(char c) {
79 return char_is_one_of(c, s: "aAeEfFgG");
80}
81
82// Returns string output character size for string-like conversions,
83// or 0 if the conversion is invalid.
84static int format_get_char_size(char convSpecifier,
85 const char lengthModifier[2]) {
86 if (char_is_one_of(c: convSpecifier, s: "CS")) {
87 return sizeof(wchar_t);
88 }
89
90 if (char_is_one_of(c: convSpecifier, s: "cs[")) {
91 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
92 return sizeof(wchar_t);
93 else if (lengthModifier[0] == '\0')
94 return sizeof(char);
95 }
96
97 return 0;
98}
99
100enum FormatStoreSize {
101 // Store size not known in advance; can be calculated as wcslen() of the
102 // destination buffer.
103 FSS_WCSLEN = -2,
104 // Store size not known in advance; can be calculated as strlen() of the
105 // destination buffer.
106 FSS_STRLEN = -1,
107 // Invalid conversion specifier.
108 FSS_INVALID = 0
109};
110
111// Returns the memory size of a format directive (if >0), or a value of
112// FormatStoreSize.
113static int format_get_value_size(char convSpecifier,
114 const char lengthModifier[2],
115 bool promote_float) {
116 if (format_is_integer_conv(c: convSpecifier)) {
117 switch (lengthModifier[0]) {
118 case 'h':
119 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
120 case 'l':
121 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
122 case 'q':
123 return sizeof(long long);
124 case 'L':
125 return sizeof(long long);
126 case 'j':
127 return sizeof(INTMAX_T);
128 case 'z':
129 return sizeof(SIZE_T);
130 case 't':
131 return sizeof(PTRDIFF_T);
132 case 0:
133 return sizeof(int);
134 default:
135 return FSS_INVALID;
136 }
137 }
138
139 if (format_is_float_conv(c: convSpecifier)) {
140 switch (lengthModifier[0]) {
141 case 'L':
142 case 'q':
143 return sizeof(long double);
144 case 'l':
145 return lengthModifier[1] == 'l' ? sizeof(long double)
146 : sizeof(double);
147 case 0:
148 // Printf promotes floats to doubles but scanf does not
149 return promote_float ? sizeof(double) : sizeof(float);
150 default:
151 return FSS_INVALID;
152 }
153 }
154
155 if (convSpecifier == 'p') {
156 if (lengthModifier[0] != 0)
157 return FSS_INVALID;
158 return sizeof(void *);
159 }
160
161 return FSS_INVALID;
162}
163
164struct ScanfDirective {
165 int argIdx; // argument index, or -1 if not specified ("%n$")
166 int fieldWidth;
167 const char *begin;
168 const char *end;
169 bool suppressed; // suppress assignment ("*")
170 bool allocate; // allocate space ("m")
171 char lengthModifier[2];
172 char convSpecifier;
173 bool maybeGnuMalloc;
174};
175
176// Parse scanf format string. If a valid directive in encountered, it is
177// returned in dir. This function returns the pointer to the first
178// unprocessed character, or 0 in case of error.
179// In case of the end-of-string, a pointer to the closing \0 is returned.
180static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
181 ScanfDirective *dir) {
182 internal_memset(s: dir, c: 0, n: sizeof(*dir));
183 dir->argIdx = -1;
184
185 while (*p) {
186 if (*p != '%') {
187 ++p;
188 continue;
189 }
190 dir->begin = p;
191 ++p;
192 // %%
193 if (*p == '%') {
194 ++p;
195 continue;
196 }
197 if (*p == '\0') {
198 return nullptr;
199 }
200 // %n$
201 p = maybe_parse_param_index(p, out: &dir->argIdx);
202 CHECK(p);
203 // *
204 if (*p == '*') {
205 dir->suppressed = true;
206 ++p;
207 }
208 // Field width
209 if (*p >= '0' && *p <= '9') {
210 p = parse_number(p, out: &dir->fieldWidth);
211 CHECK(p);
212 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
213 return nullptr;
214 }
215 // m
216 if (*p == 'm') {
217 dir->allocate = true;
218 ++p;
219 }
220 // Length modifier.
221 p = maybe_parse_length_modifier(p, ll: dir->lengthModifier);
222 // Conversion specifier.
223 dir->convSpecifier = *p++;
224 // Consume %[...] expression.
225 if (dir->convSpecifier == '[') {
226 if (*p == '^')
227 ++p;
228 if (*p == ']')
229 ++p;
230 while (*p && *p != ']')
231 ++p;
232 if (*p == 0)
233 return nullptr; // unexpected end of string
234 // Consume the closing ']'.
235 ++p;
236 }
237 // This is unfortunately ambiguous between old GNU extension
238 // of %as, %aS and %a[...] and newer POSIX %a followed by
239 // letters s, S or [.
240 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
241 !dir->lengthModifier[0]) {
242 if (*p == 's' || *p == 'S') {
243 dir->maybeGnuMalloc = true;
244 ++p;
245 } else if (*p == '[') {
246 // Watch for %a[h-j%d], if % appears in the
247 // [...] range, then we need to give up, we don't know
248 // if scanf will parse it as POSIX %a [h-j %d ] or
249 // GNU allocation of string with range dh-j plus %.
250 const char *q = p + 1;
251 if (*q == '^')
252 ++q;
253 if (*q == ']')
254 ++q;
255 while (*q && *q != ']' && *q != '%')
256 ++q;
257 if (*q == 0 || *q == '%')
258 return nullptr;
259 p = q + 1; // Consume the closing ']'.
260 dir->maybeGnuMalloc = true;
261 }
262 }
263 dir->end = p;
264 break;
265 }
266 return p;
267}
268
269static int scanf_get_value_size(ScanfDirective *dir) {
270 if (dir->allocate) {
271 if (!char_is_one_of(c: dir->convSpecifier, s: "cCsS["))
272 return FSS_INVALID;
273 return sizeof(char *);
274 }
275
276 if (dir->maybeGnuMalloc) {
277 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
278 return FSS_INVALID;
279 // This is ambiguous, so check the smaller size of char * (if it is
280 // a GNU extension of %as, %aS or %a[...]) and float (if it is
281 // POSIX %a followed by s, S or [ letters).
282 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
283 }
284
285 if (char_is_one_of(c: dir->convSpecifier, s: "cCsS[")) {
286 bool needsTerminator = char_is_one_of(c: dir->convSpecifier, s: "sS[");
287 unsigned charSize =
288 format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier);
289 if (charSize == 0)
290 return FSS_INVALID;
291 if (dir->fieldWidth == 0) {
292 if (!needsTerminator)
293 return charSize;
294 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
295 }
296 return (dir->fieldWidth + needsTerminator) * charSize;
297 }
298
299 return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: false);
300}
301
302// Common part of *scanf interceptors.
303// Process format string and va_list, and report all store ranges.
304// Stops when "consuming" n_inputs input items.
305static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
306 const char *format, va_list aq) {
307 CHECK_GT(n_inputs, 0);
308 const char *p = format;
309
310 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
311
312 while (*p) {
313 ScanfDirective dir;
314 p = scanf_parse_next(p, allowGnuMalloc, dir: &dir);
315 if (!p)
316 break;
317 if (dir.convSpecifier == 0) {
318 // This can only happen at the end of the format string.
319 CHECK_EQ(*p, 0);
320 break;
321 }
322 // Here the directive is valid. Do what it says.
323 if (dir.argIdx != -1) {
324 // Unsupported.
325 break;
326 }
327 if (dir.suppressed)
328 continue;
329 int size = scanf_get_value_size(dir: &dir);
330 if (size == FSS_INVALID) {
331 Report(format: "%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
332 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
333 break;
334 }
335 void *argp = va_arg(aq, void *);
336 if (dir.convSpecifier != 'n')
337 --n_inputs;
338 if (n_inputs < 0)
339 break;
340 if (size == FSS_STRLEN) {
341 size = internal_strlen(s: (const char *)argp) + 1;
342 } else if (size == FSS_WCSLEN) {
343 // FIXME: actually use wcslen() to calculate it.
344 size = 0;
345 }
346 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
347 // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
348 if (dir.allocate) {
349 if (char *buf = *(char **)argp) {
350 if (dir.convSpecifier == 'c')
351 size = 1;
352 else if (dir.convSpecifier == 'C')
353 size = sizeof(wchar_t);
354 else if (dir.convSpecifier == 'S')
355 size = (internal_wcslen(s: (wchar_t *)buf) + 1) * sizeof(wchar_t);
356 else // 's' or '['
357 size = internal_strlen(s: buf) + 1;
358 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
359 }
360 }
361 }
362}
363
364#if SANITIZER_INTERCEPT_PRINTF
365
366struct PrintfDirective {
367 int fieldWidth;
368 int fieldPrecision;
369 int argIdx; // width argument index, or -1 if not specified ("%*n$")
370 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
371 const char *begin;
372 const char *end;
373 bool starredWidth;
374 bool starredPrecision;
375 char lengthModifier[2];
376 char convSpecifier;
377};
378
379static const char *maybe_parse_number(const char *p, int *out) {
380 if (*p >= '0' && *p <= '9')
381 p = parse_number(p, out);
382 return p;
383}
384
385static const char *maybe_parse_number_or_star(const char *p, int *out,
386 bool *star) {
387 if (*p == '*') {
388 *star = true;
389 ++p;
390 } else {
391 *star = false;
392 p = maybe_parse_number(p, out);
393 }
394 return p;
395}
396
397// Parse printf format string. Same as scanf_parse_next.
398static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
399 internal_memset(s: dir, c: 0, n: sizeof(*dir));
400 dir->argIdx = -1;
401 dir->precisionIdx = -1;
402
403 while (*p) {
404 if (*p != '%') {
405 ++p;
406 continue;
407 }
408 dir->begin = p;
409 ++p;
410 // %%
411 if (*p == '%') {
412 ++p;
413 continue;
414 }
415 if (*p == '\0') {
416 return nullptr;
417 }
418 // %n$
419 p = maybe_parse_param_index(p, out: &dir->precisionIdx);
420 CHECK(p);
421 // Flags
422 while (char_is_one_of(c: *p, s: "'-+ #0")) {
423 ++p;
424 }
425 // Field width
426 p = maybe_parse_number_or_star(p, out: &dir->fieldWidth,
427 star: &dir->starredWidth);
428 if (!p)
429 return nullptr;
430 // Precision
431 if (*p == '.') {
432 ++p;
433 // Actual precision is optional (surprise!)
434 p = maybe_parse_number_or_star(p, out: &dir->fieldPrecision,
435 star: &dir->starredPrecision);
436 if (!p)
437 return nullptr;
438 // m$
439 if (dir->starredPrecision) {
440 p = maybe_parse_param_index(p, out: &dir->precisionIdx);
441 CHECK(p);
442 }
443 }
444 // Length modifier.
445 p = maybe_parse_length_modifier(p, ll: dir->lengthModifier);
446 // Conversion specifier.
447 dir->convSpecifier = *p++;
448 dir->end = p;
449 break;
450 }
451 return p;
452}
453
454static int printf_get_value_size(PrintfDirective *dir) {
455 if (char_is_one_of(c: dir->convSpecifier, s: "cCsS")) {
456 unsigned charSize =
457 format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier);
458 if (charSize == 0)
459 return FSS_INVALID;
460 if (char_is_one_of(c: dir->convSpecifier, s: "sS")) {
461 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
462 }
463 return charSize;
464 }
465
466 return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: true);
467}
468
469#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
470 do { \
471 if (format_is_float_conv(convSpecifier)) { \
472 switch (size) { \
473 case 8: \
474 va_arg(*aq, double); \
475 break; \
476 case 12: \
477 va_arg(*aq, long double); \
478 break; \
479 case 16: \
480 va_arg(*aq, long double); \
481 break; \
482 default: \
483 Report("WARNING: unexpected floating-point arg size" \
484 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
485 return; \
486 } \
487 } else { \
488 switch (size) { \
489 case 1: \
490 case 2: \
491 case 4: \
492 va_arg(*aq, u32); \
493 break; \
494 case 8: \
495 va_arg(*aq, u64); \
496 break; \
497 default: \
498 Report("WARNING: unexpected arg size" \
499 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
500 return; \
501 } \
502 } \
503 } while (0)
504
505// Common part of *printf interceptors.
506// Process format string and va_list, and report all load ranges.
507static void printf_common(void *ctx, const char *format, va_list aq) {
508 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
509
510 const char *p = format;
511
512 while (*p) {
513 PrintfDirective dir;
514 p = printf_parse_next(p, dir: &dir);
515 if (!p)
516 break;
517 if (dir.convSpecifier == 0) {
518 // This can only happen at the end of the format string.
519 CHECK_EQ(*p, 0);
520 break;
521 }
522 // Here the directive is valid. Do what it says.
523 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
524 // Unsupported.
525 break;
526 }
527 if (dir.starredWidth) {
528 // Dynamic width
529 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
530 }
531 if (dir.starredPrecision) {
532 // Dynamic precision
533 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
534 }
535 // %m does not require an argument: strlen(errno).
536 if (dir.convSpecifier == 'm')
537 continue;
538 int size = printf_get_value_size(dir: &dir);
539 if (size == FSS_INVALID) {
540 static int ReportedOnce;
541 if (!ReportedOnce++)
542 Report(
543 format: "%s: WARNING: unexpected format specifier in printf "
544 "interceptor: %.*s (reported once per process)\n",
545 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
546 break;
547 }
548 if (dir.convSpecifier == 'n') {
549 void *argp = va_arg(aq, void *);
550 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
551 continue;
552 } else if (size == FSS_STRLEN) {
553 if (void *argp = va_arg(aq, void *)) {
554 uptr len;
555 if (dir.starredPrecision) {
556 // FIXME: properly support starred precision for strings.
557 len = 0;
558 } else if (dir.fieldPrecision > 0) {
559 // Won't read more than "precision" symbols.
560 len = internal_strnlen(s: (const char *)argp, maxlen: dir.fieldPrecision);
561 if (len < (uptr)dir.fieldPrecision)
562 len++;
563 } else {
564 // Whole string will be accessed.
565 len = internal_strlen(s: (const char *)argp) + 1;
566 }
567 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
568 }
569 } else if (size == FSS_WCSLEN) {
570 if (void *argp = va_arg(aq, void *)) {
571 // FIXME: Properly support wide-character strings (via wcsrtombs).
572 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
573 }
574 } else {
575 // Skip non-pointer args
576 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
577 }
578 }
579}
580
581#endif // SANITIZER_INTERCEPT_PRINTF
582