1//===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Scanf/printf implementation for use in *Sanitizer interceptors.
10// Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html
11// and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html
12// with a few common GNU extensions.
13//
14//===----------------------------------------------------------------------===//
15
16#include <stdarg.h>
17
18static const char *parse_number(const char *p, int *out) {
19 *out = internal_atoll(nptr: p);
20 while (*p >= '0' && *p <= '9')
21 ++p;
22 return p;
23}
24
25static const char *maybe_parse_param_index(const char *p, int *out) {
26 // n$
27 if (*p >= '0' && *p <= '9') {
28 int number;
29 const char *q = parse_number(p, out: &number);
30 CHECK(q);
31 if (*q == '$') {
32 *out = number;
33 p = q + 1;
34 }
35 }
36
37 // Otherwise, do not change p. This will be re-parsed later as the field
38 // width.
39 return p;
40}
41
42static bool char_is_one_of(char c, const char *s) {
43 return !!internal_strchr(s, c);
44}
45
46static const char *maybe_parse_length_modifier(const char *p, char ll[2]) {
47 if (char_is_one_of(c: *p, s: "jztLq")) {
48 ll[0] = *p;
49 ++p;
50 } else if (*p == 'h') {
51 ll[0] = 'h';
52 ++p;
53 if (*p == 'h') {
54 ll[1] = 'h';
55 ++p;
56 }
57 } else if (*p == 'l') {
58 ll[0] = 'l';
59 ++p;
60 if (*p == 'l') {
61 ll[1] = 'l';
62 ++p;
63 }
64 }
65 return p;
66}
67
68// Returns true if the character is an integer conversion specifier.
69static bool format_is_integer_conv(char c) {
70 return char_is_one_of(c, s: "diouxXn");
71}
72
73// Returns true if the character is an floating point conversion specifier.
74static bool format_is_float_conv(char c) {
75 return char_is_one_of(c, s: "aAeEfFgG");
76}
77
78// Returns string output character size for string-like conversions,
79// or 0 if the conversion is invalid.
80static int format_get_char_size(char convSpecifier,
81 const char lengthModifier[2]) {
82 if (char_is_one_of(c: convSpecifier, s: "CS")) {
83 return sizeof(wchar_t);
84 }
85
86 if (char_is_one_of(c: convSpecifier, s: "cs[")) {
87 if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0')
88 return sizeof(wchar_t);
89 else if (lengthModifier[0] == '\0')
90 return sizeof(char);
91 }
92
93 return 0;
94}
95
96enum FormatStoreSize {
97 // Store size not known in advance; can be calculated as wcslen() of the
98 // destination buffer.
99 FSS_WCSLEN = -2,
100 // Store size not known in advance; can be calculated as strlen() of the
101 // destination buffer.
102 FSS_STRLEN = -1,
103 // Invalid conversion specifier.
104 FSS_INVALID = 0
105};
106
107// Returns the memory size of a format directive (if >0), or a value of
108// FormatStoreSize.
109static int format_get_value_size(char convSpecifier,
110 const char lengthModifier[2],
111 bool promote_float) {
112 if (format_is_integer_conv(c: convSpecifier)) {
113 switch (lengthModifier[0]) {
114 case 'h':
115 return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short);
116 case 'l':
117 return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long);
118 case 'q':
119 return sizeof(long long);
120 case 'L':
121 return sizeof(long long);
122 case 'j':
123 return sizeof(INTMAX_T);
124 case 'z':
125 return sizeof(SIZE_T);
126 case 't':
127 return sizeof(PTRDIFF_T);
128 case 0:
129 return sizeof(int);
130 default:
131 return FSS_INVALID;
132 }
133 }
134
135 if (format_is_float_conv(c: convSpecifier)) {
136 switch (lengthModifier[0]) {
137 case 'L':
138 case 'q':
139 return sizeof(long double);
140 case 'l':
141 return lengthModifier[1] == 'l' ? sizeof(long double)
142 : sizeof(double);
143 case 0:
144 // Printf promotes floats to doubles but scanf does not
145 return promote_float ? sizeof(double) : sizeof(float);
146 default:
147 return FSS_INVALID;
148 }
149 }
150
151 if (convSpecifier == 'p') {
152 if (lengthModifier[0] != 0)
153 return FSS_INVALID;
154 return sizeof(void *);
155 }
156
157 return FSS_INVALID;
158}
159
160struct ScanfDirective {
161 int argIdx; // argument index, or -1 if not specified ("%n$")
162 int fieldWidth;
163 const char *begin;
164 const char *end;
165 bool suppressed; // suppress assignment ("*")
166 bool allocate; // allocate space ("m")
167 char lengthModifier[2];
168 char convSpecifier;
169 bool maybeGnuMalloc;
170};
171
172// Parse scanf format string. If a valid directive in encountered, it is
173// returned in dir. This function returns the pointer to the first
174// unprocessed character, or 0 in case of error.
175// In case of the end-of-string, a pointer to the closing \0 is returned.
176static const char *scanf_parse_next(const char *p, bool allowGnuMalloc,
177 ScanfDirective *dir) {
178 internal_memset(s: dir, c: 0, n: sizeof(*dir));
179 dir->argIdx = -1;
180
181 while (*p) {
182 if (*p != '%') {
183 ++p;
184 continue;
185 }
186 dir->begin = p;
187 ++p;
188 // %%
189 if (*p == '%') {
190 ++p;
191 continue;
192 }
193 if (*p == '\0') {
194 return nullptr;
195 }
196 // %n$
197 p = maybe_parse_param_index(p, out: &dir->argIdx);
198 CHECK(p);
199 // *
200 if (*p == '*') {
201 dir->suppressed = true;
202 ++p;
203 }
204 // Field width
205 if (*p >= '0' && *p <= '9') {
206 p = parse_number(p, out: &dir->fieldWidth);
207 CHECK(p);
208 if (dir->fieldWidth <= 0) // Width if at all must be non-zero
209 return nullptr;
210 }
211 // m
212 if (*p == 'm') {
213 dir->allocate = true;
214 ++p;
215 }
216 // Length modifier.
217 p = maybe_parse_length_modifier(p, ll: dir->lengthModifier);
218 // Conversion specifier.
219 dir->convSpecifier = *p++;
220 // Consume %[...] expression.
221 if (dir->convSpecifier == '[') {
222 if (*p == '^')
223 ++p;
224 if (*p == ']')
225 ++p;
226 while (*p && *p != ']')
227 ++p;
228 if (*p == 0)
229 return nullptr; // unexpected end of string
230 // Consume the closing ']'.
231 ++p;
232 }
233 // This is unfortunately ambiguous between old GNU extension
234 // of %as, %aS and %a[...] and newer POSIX %a followed by
235 // letters s, S or [.
236 if (allowGnuMalloc && dir->convSpecifier == 'a' &&
237 !dir->lengthModifier[0]) {
238 if (*p == 's' || *p == 'S') {
239 dir->maybeGnuMalloc = true;
240 ++p;
241 } else if (*p == '[') {
242 // Watch for %a[h-j%d], if % appears in the
243 // [...] range, then we need to give up, we don't know
244 // if scanf will parse it as POSIX %a [h-j %d ] or
245 // GNU allocation of string with range dh-j plus %.
246 const char *q = p + 1;
247 if (*q == '^')
248 ++q;
249 if (*q == ']')
250 ++q;
251 while (*q && *q != ']' && *q != '%')
252 ++q;
253 if (*q == 0 || *q == '%')
254 return nullptr;
255 p = q + 1; // Consume the closing ']'.
256 dir->maybeGnuMalloc = true;
257 }
258 }
259 dir->end = p;
260 break;
261 }
262 return p;
263}
264
265static int scanf_get_value_size(ScanfDirective *dir) {
266 if (dir->allocate) {
267 if (!char_is_one_of(c: dir->convSpecifier, s: "cCsS["))
268 return FSS_INVALID;
269 return sizeof(char *);
270 }
271
272 if (dir->maybeGnuMalloc) {
273 if (dir->convSpecifier != 'a' || dir->lengthModifier[0])
274 return FSS_INVALID;
275 // This is ambiguous, so check the smaller size of char * (if it is
276 // a GNU extension of %as, %aS or %a[...]) and float (if it is
277 // POSIX %a followed by s, S or [ letters).
278 return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float);
279 }
280
281 if (char_is_one_of(c: dir->convSpecifier, s: "cCsS[")) {
282 bool needsTerminator = char_is_one_of(c: dir->convSpecifier, s: "sS[");
283 unsigned charSize =
284 format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier);
285 if (charSize == 0)
286 return FSS_INVALID;
287 if (dir->fieldWidth == 0) {
288 if (!needsTerminator)
289 return charSize;
290 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
291 }
292 return (dir->fieldWidth + needsTerminator) * charSize;
293 }
294
295 return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: false);
296}
297
298// Common part of *scanf interceptors.
299// Process format string and va_list, and report all store ranges.
300// Stops when "consuming" n_inputs input items.
301static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc,
302 const char *format, va_list aq) {
303 CHECK_GT(n_inputs, 0);
304 const char *p = format;
305
306 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
307
308 while (*p) {
309 ScanfDirective dir;
310 p = scanf_parse_next(p, allowGnuMalloc, dir: &dir);
311 if (!p)
312 break;
313 if (dir.convSpecifier == 0) {
314 // This can only happen at the end of the format string.
315 CHECK_EQ(*p, 0);
316 break;
317 }
318 // Here the directive is valid. Do what it says.
319 if (dir.argIdx != -1) {
320 // Unsupported.
321 break;
322 }
323 if (dir.suppressed)
324 continue;
325 int size = scanf_get_value_size(dir: &dir);
326 if (size == FSS_INVALID) {
327 Report(format: "%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n",
328 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
329 break;
330 }
331 void *argp = va_arg(aq, void *);
332 if (dir.convSpecifier != 'n')
333 --n_inputs;
334 if (n_inputs < 0)
335 break;
336 if (size == FSS_STRLEN) {
337 size = internal_strlen(s: (const char *)argp) + 1;
338 } else if (size == FSS_WCSLEN) {
339 // FIXME: actually use wcslen() to calculate it.
340 size = 0;
341 }
342 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
343 // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well.
344 if (dir.allocate) {
345 if (char *buf = *(char **)argp) {
346 if (dir.convSpecifier == 'c')
347 size = 1;
348 else if (dir.convSpecifier == 'C')
349 size = sizeof(wchar_t);
350 else if (dir.convSpecifier == 'S')
351 size = (internal_wcslen(s: (wchar_t *)buf) + 1) * sizeof(wchar_t);
352 else // 's' or '['
353 size = internal_strlen(s: buf) + 1;
354 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size);
355 }
356 }
357 }
358}
359
360#if SANITIZER_INTERCEPT_PRINTF
361
362struct PrintfDirective {
363 int fieldWidth;
364 int fieldPrecision;
365 int argIdx; // width argument index, or -1 if not specified ("%*n$")
366 int precisionIdx; // precision argument index, or -1 if not specified (".*n$")
367 const char *begin;
368 const char *end;
369 bool starredWidth;
370 bool starredPrecision;
371 char lengthModifier[2];
372 char convSpecifier;
373};
374
375static const char *maybe_parse_number(const char *p, int *out) {
376 if (*p >= '0' && *p <= '9')
377 p = parse_number(p, out);
378 return p;
379}
380
381static const char *maybe_parse_number_or_star(const char *p, int *out,
382 bool *star) {
383 if (*p == '*') {
384 *star = true;
385 ++p;
386 } else {
387 *star = false;
388 p = maybe_parse_number(p, out);
389 }
390 return p;
391}
392
393// Parse printf format string. Same as scanf_parse_next.
394static const char *printf_parse_next(const char *p, PrintfDirective *dir) {
395 internal_memset(s: dir, c: 0, n: sizeof(*dir));
396 dir->argIdx = -1;
397 dir->precisionIdx = -1;
398
399 while (*p) {
400 if (*p != '%') {
401 ++p;
402 continue;
403 }
404 dir->begin = p;
405 ++p;
406 // %%
407 if (*p == '%') {
408 ++p;
409 continue;
410 }
411 if (*p == '\0') {
412 return nullptr;
413 }
414 // %n$
415 p = maybe_parse_param_index(p, out: &dir->precisionIdx);
416 CHECK(p);
417 // Flags
418 while (char_is_one_of(c: *p, s: "'-+ #0")) {
419 ++p;
420 }
421 // Field width
422 p = maybe_parse_number_or_star(p, out: &dir->fieldWidth,
423 star: &dir->starredWidth);
424 if (!p)
425 return nullptr;
426 // Precision
427 if (*p == '.') {
428 ++p;
429 // Actual precision is optional (surprise!)
430 p = maybe_parse_number_or_star(p, out: &dir->fieldPrecision,
431 star: &dir->starredPrecision);
432 if (!p)
433 return nullptr;
434 // m$
435 if (dir->starredPrecision) {
436 p = maybe_parse_param_index(p, out: &dir->precisionIdx);
437 CHECK(p);
438 }
439 }
440 // Length modifier.
441 p = maybe_parse_length_modifier(p, ll: dir->lengthModifier);
442 // Conversion specifier.
443 dir->convSpecifier = *p++;
444 dir->end = p;
445 break;
446 }
447 return p;
448}
449
450static int printf_get_value_size(PrintfDirective *dir) {
451 if (char_is_one_of(c: dir->convSpecifier, s: "cCsS")) {
452 unsigned charSize =
453 format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier);
454 if (charSize == 0)
455 return FSS_INVALID;
456 if (char_is_one_of(c: dir->convSpecifier, s: "sS")) {
457 return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN;
458 }
459 return charSize;
460 }
461
462 return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: true);
463}
464
465#define SKIP_SCALAR_ARG(aq, convSpecifier, size) \
466 do { \
467 if (format_is_float_conv(convSpecifier)) { \
468 switch (size) { \
469 case 8: \
470 va_arg(*aq, double); \
471 break; \
472 case 12: \
473 va_arg(*aq, long double); \
474 break; \
475 case 16: \
476 va_arg(*aq, long double); \
477 break; \
478 default: \
479 Report("WARNING: unexpected floating-point arg size" \
480 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
481 return; \
482 } \
483 } else { \
484 switch (size) { \
485 case 1: \
486 case 2: \
487 case 4: \
488 va_arg(*aq, u32); \
489 break; \
490 case 8: \
491 va_arg(*aq, u64); \
492 break; \
493 default: \
494 Report("WARNING: unexpected arg size" \
495 " in printf interceptor: %zu\n", static_cast<uptr>(size)); \
496 return; \
497 } \
498 } \
499 } while (0)
500
501// Common part of *printf interceptors.
502// Process format string and va_list, and report all load ranges.
503static void printf_common(void *ctx, const char *format, va_list aq) {
504 COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1);
505
506 const char *p = format;
507
508 while (*p) {
509 PrintfDirective dir;
510 p = printf_parse_next(p, dir: &dir);
511 if (!p)
512 break;
513 if (dir.convSpecifier == 0) {
514 // This can only happen at the end of the format string.
515 CHECK_EQ(*p, 0);
516 break;
517 }
518 // Here the directive is valid. Do what it says.
519 if (dir.argIdx != -1 || dir.precisionIdx != -1) {
520 // Unsupported.
521 break;
522 }
523 if (dir.starredWidth) {
524 // Dynamic width
525 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
526 }
527 if (dir.starredPrecision) {
528 // Dynamic precision
529 SKIP_SCALAR_ARG(&aq, 'd', sizeof(int));
530 }
531 // %m does not require an argument: strlen(errno).
532 if (dir.convSpecifier == 'm')
533 continue;
534 int size = printf_get_value_size(dir: &dir);
535 if (size == FSS_INVALID) {
536 static int ReportedOnce;
537 if (!ReportedOnce++)
538 Report(
539 format: "%s: WARNING: unexpected format specifier in printf "
540 "interceptor: %.*s (reported once per process)\n",
541 SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin);
542 break;
543 }
544 if (dir.convSpecifier == 'n') {
545 void *argp = va_arg(aq, void *);
546 COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size);
547 continue;
548 } else if (size == FSS_STRLEN) {
549 if (void *argp = va_arg(aq, void *)) {
550 uptr len;
551 if (dir.starredPrecision) {
552 // FIXME: properly support starred precision for strings.
553 len = 0;
554 } else if (dir.fieldPrecision > 0) {
555 // Won't read more than "precision" symbols.
556 len = internal_strnlen(s: (const char *)argp, maxlen: dir.fieldPrecision);
557 if (len < (uptr)dir.fieldPrecision)
558 len++;
559 } else {
560 // Whole string will be accessed.
561 len = internal_strlen(s: (const char *)argp) + 1;
562 }
563 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len);
564 }
565 } else if (size == FSS_WCSLEN) {
566 if (void *argp = va_arg(aq, void *)) {
567 // FIXME: Properly support wide-character strings (via wcsrtombs).
568 COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0);
569 }
570 } else {
571 // Skip non-pointer args
572 SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size);
573 }
574 }
575}
576
577#endif // SANITIZER_INTERCEPT_PRINTF
578