1 | //===-- sanitizer_common_interceptors_format.inc ----------------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // Scanf/printf implementation for use in *Sanitizer interceptors. |
10 | // Follows http://pubs.opengroup.org/onlinepubs/9699919799/functions/fscanf.html |
11 | // and http://pubs.opengroup.org/onlinepubs/9699919799/functions/fprintf.html |
12 | // with a few common GNU extensions. |
13 | // |
14 | //===----------------------------------------------------------------------===// |
15 | |
16 | #include <stdarg.h> |
17 | |
18 | static const char *parse_number(const char *p, int *out) { |
19 | *out = internal_atoll(nptr: p); |
20 | while (*p >= '0' && *p <= '9') |
21 | ++p; |
22 | return p; |
23 | } |
24 | |
25 | static const char *maybe_parse_param_index(const char *p, int *out) { |
26 | // n$ |
27 | if (*p >= '0' && *p <= '9') { |
28 | int number; |
29 | const char *q = parse_number(p, out: &number); |
30 | CHECK(q); |
31 | if (*q == '$') { |
32 | *out = number; |
33 | p = q + 1; |
34 | } |
35 | } |
36 | |
37 | // Otherwise, do not change p. This will be re-parsed later as the field |
38 | // width. |
39 | return p; |
40 | } |
41 | |
42 | static bool char_is_one_of(char c, const char *s) { |
43 | return !!internal_strchr(s, c); |
44 | } |
45 | |
46 | static const char *maybe_parse_length_modifier(const char *p, char ll[2]) { |
47 | if (char_is_one_of(c: *p, s: "jztLq" )) { |
48 | ll[0] = *p; |
49 | ++p; |
50 | } else if (*p == 'h') { |
51 | ll[0] = 'h'; |
52 | ++p; |
53 | if (*p == 'h') { |
54 | ll[1] = 'h'; |
55 | ++p; |
56 | } |
57 | } else if (*p == 'l') { |
58 | ll[0] = 'l'; |
59 | ++p; |
60 | if (*p == 'l') { |
61 | ll[1] = 'l'; |
62 | ++p; |
63 | } |
64 | } |
65 | return p; |
66 | } |
67 | |
68 | // Returns true if the character is an integer conversion specifier. |
69 | static bool format_is_integer_conv(char c) { |
70 | #if SANITIZER_GLIBC |
71 | if (char_is_one_of(c, s: "bB" )) |
72 | return true; |
73 | #endif |
74 | return char_is_one_of(c, s: "diouxXn" ); |
75 | } |
76 | |
77 | // Returns true if the character is an floating point conversion specifier. |
78 | static bool format_is_float_conv(char c) { |
79 | return char_is_one_of(c, s: "aAeEfFgG" ); |
80 | } |
81 | |
82 | // Returns string output character size for string-like conversions, |
83 | // or 0 if the conversion is invalid. |
84 | static int format_get_char_size(char convSpecifier, |
85 | const char lengthModifier[2]) { |
86 | if (char_is_one_of(c: convSpecifier, s: "CS" )) { |
87 | return sizeof(wchar_t); |
88 | } |
89 | |
90 | if (char_is_one_of(c: convSpecifier, s: "cs[" )) { |
91 | if (lengthModifier[0] == 'l' && lengthModifier[1] == '\0') |
92 | return sizeof(wchar_t); |
93 | else if (lengthModifier[0] == '\0') |
94 | return sizeof(char); |
95 | } |
96 | |
97 | return 0; |
98 | } |
99 | |
100 | enum FormatStoreSize { |
101 | // Store size not known in advance; can be calculated as wcslen() of the |
102 | // destination buffer. |
103 | FSS_WCSLEN = -2, |
104 | // Store size not known in advance; can be calculated as strlen() of the |
105 | // destination buffer. |
106 | FSS_STRLEN = -1, |
107 | // Invalid conversion specifier. |
108 | FSS_INVALID = 0 |
109 | }; |
110 | |
111 | // Returns the memory size of a format directive (if >0), or a value of |
112 | // FormatStoreSize. |
113 | static int format_get_value_size(char convSpecifier, |
114 | const char lengthModifier[2], |
115 | bool promote_float) { |
116 | if (format_is_integer_conv(c: convSpecifier)) { |
117 | switch (lengthModifier[0]) { |
118 | case 'h': |
119 | return lengthModifier[1] == 'h' ? sizeof(char) : sizeof(short); |
120 | case 'l': |
121 | return lengthModifier[1] == 'l' ? sizeof(long long) : sizeof(long); |
122 | case 'q': |
123 | return sizeof(long long); |
124 | case 'L': |
125 | return sizeof(long long); |
126 | case 'j': |
127 | return sizeof(INTMAX_T); |
128 | case 'z': |
129 | return sizeof(SIZE_T); |
130 | case 't': |
131 | return sizeof(PTRDIFF_T); |
132 | case 0: |
133 | return sizeof(int); |
134 | default: |
135 | return FSS_INVALID; |
136 | } |
137 | } |
138 | |
139 | if (format_is_float_conv(c: convSpecifier)) { |
140 | switch (lengthModifier[0]) { |
141 | case 'L': |
142 | case 'q': |
143 | return sizeof(long double); |
144 | case 'l': |
145 | return lengthModifier[1] == 'l' ? sizeof(long double) |
146 | : sizeof(double); |
147 | case 0: |
148 | // Printf promotes floats to doubles but scanf does not |
149 | return promote_float ? sizeof(double) : sizeof(float); |
150 | default: |
151 | return FSS_INVALID; |
152 | } |
153 | } |
154 | |
155 | if (convSpecifier == 'p') { |
156 | if (lengthModifier[0] != 0) |
157 | return FSS_INVALID; |
158 | return sizeof(void *); |
159 | } |
160 | |
161 | return FSS_INVALID; |
162 | } |
163 | |
164 | struct ScanfDirective { |
165 | int argIdx; // argument index, or -1 if not specified ("%n$") |
166 | int fieldWidth; |
167 | const char *begin; |
168 | const char *end; |
169 | bool suppressed; // suppress assignment ("*") |
170 | bool allocate; // allocate space ("m") |
171 | char lengthModifier[2]; |
172 | char convSpecifier; |
173 | bool maybeGnuMalloc; |
174 | }; |
175 | |
176 | // Parse scanf format string. If a valid directive in encountered, it is |
177 | // returned in dir. This function returns the pointer to the first |
178 | // unprocessed character, or 0 in case of error. |
179 | // In case of the end-of-string, a pointer to the closing \0 is returned. |
180 | static const char *scanf_parse_next(const char *p, bool allowGnuMalloc, |
181 | ScanfDirective *dir) { |
182 | internal_memset(s: dir, c: 0, n: sizeof(*dir)); |
183 | dir->argIdx = -1; |
184 | |
185 | while (*p) { |
186 | if (*p != '%') { |
187 | ++p; |
188 | continue; |
189 | } |
190 | dir->begin = p; |
191 | ++p; |
192 | // %% |
193 | if (*p == '%') { |
194 | ++p; |
195 | continue; |
196 | } |
197 | if (*p == '\0') { |
198 | return nullptr; |
199 | } |
200 | // %n$ |
201 | p = maybe_parse_param_index(p, out: &dir->argIdx); |
202 | CHECK(p); |
203 | // * |
204 | if (*p == '*') { |
205 | dir->suppressed = true; |
206 | ++p; |
207 | } |
208 | // Field width |
209 | if (*p >= '0' && *p <= '9') { |
210 | p = parse_number(p, out: &dir->fieldWidth); |
211 | CHECK(p); |
212 | if (dir->fieldWidth <= 0) // Width if at all must be non-zero |
213 | return nullptr; |
214 | } |
215 | // m |
216 | if (*p == 'm') { |
217 | dir->allocate = true; |
218 | ++p; |
219 | } |
220 | // Length modifier. |
221 | p = maybe_parse_length_modifier(p, ll: dir->lengthModifier); |
222 | // Conversion specifier. |
223 | dir->convSpecifier = *p++; |
224 | // Consume %[...] expression. |
225 | if (dir->convSpecifier == '[') { |
226 | if (*p == '^') |
227 | ++p; |
228 | if (*p == ']') |
229 | ++p; |
230 | while (*p && *p != ']') |
231 | ++p; |
232 | if (*p == 0) |
233 | return nullptr; // unexpected end of string |
234 | // Consume the closing ']'. |
235 | ++p; |
236 | } |
237 | // This is unfortunately ambiguous between old GNU extension |
238 | // of %as, %aS and %a[...] and newer POSIX %a followed by |
239 | // letters s, S or [. |
240 | if (allowGnuMalloc && dir->convSpecifier == 'a' && |
241 | !dir->lengthModifier[0]) { |
242 | if (*p == 's' || *p == 'S') { |
243 | dir->maybeGnuMalloc = true; |
244 | ++p; |
245 | } else if (*p == '[') { |
246 | // Watch for %a[h-j%d], if % appears in the |
247 | // [...] range, then we need to give up, we don't know |
248 | // if scanf will parse it as POSIX %a [h-j %d ] or |
249 | // GNU allocation of string with range dh-j plus %. |
250 | const char *q = p + 1; |
251 | if (*q == '^') |
252 | ++q; |
253 | if (*q == ']') |
254 | ++q; |
255 | while (*q && *q != ']' && *q != '%') |
256 | ++q; |
257 | if (*q == 0 || *q == '%') |
258 | return nullptr; |
259 | p = q + 1; // Consume the closing ']'. |
260 | dir->maybeGnuMalloc = true; |
261 | } |
262 | } |
263 | dir->end = p; |
264 | break; |
265 | } |
266 | return p; |
267 | } |
268 | |
269 | static int scanf_get_value_size(ScanfDirective *dir) { |
270 | if (dir->allocate) { |
271 | if (!char_is_one_of(c: dir->convSpecifier, s: "cCsS[" )) |
272 | return FSS_INVALID; |
273 | return sizeof(char *); |
274 | } |
275 | |
276 | if (dir->maybeGnuMalloc) { |
277 | if (dir->convSpecifier != 'a' || dir->lengthModifier[0]) |
278 | return FSS_INVALID; |
279 | // This is ambiguous, so check the smaller size of char * (if it is |
280 | // a GNU extension of %as, %aS or %a[...]) and float (if it is |
281 | // POSIX %a followed by s, S or [ letters). |
282 | return sizeof(char *) < sizeof(float) ? sizeof(char *) : sizeof(float); |
283 | } |
284 | |
285 | if (char_is_one_of(c: dir->convSpecifier, s: "cCsS[" )) { |
286 | bool needsTerminator = char_is_one_of(c: dir->convSpecifier, s: "sS[" ); |
287 | unsigned charSize = |
288 | format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier); |
289 | if (charSize == 0) |
290 | return FSS_INVALID; |
291 | if (dir->fieldWidth == 0) { |
292 | if (!needsTerminator) |
293 | return charSize; |
294 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; |
295 | } |
296 | return (dir->fieldWidth + needsTerminator) * charSize; |
297 | } |
298 | |
299 | return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: false); |
300 | } |
301 | |
302 | // Common part of *scanf interceptors. |
303 | // Process format string and va_list, and report all store ranges. |
304 | // Stops when "consuming" n_inputs input items. |
305 | static void scanf_common(void *ctx, int n_inputs, bool allowGnuMalloc, |
306 | const char *format, va_list aq) { |
307 | CHECK_GT(n_inputs, 0); |
308 | const char *p = format; |
309 | |
310 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); |
311 | |
312 | while (*p) { |
313 | ScanfDirective dir; |
314 | p = scanf_parse_next(p, allowGnuMalloc, dir: &dir); |
315 | if (!p) |
316 | break; |
317 | if (dir.convSpecifier == 0) { |
318 | // This can only happen at the end of the format string. |
319 | CHECK_EQ(*p, 0); |
320 | break; |
321 | } |
322 | // Here the directive is valid. Do what it says. |
323 | if (dir.argIdx != -1) { |
324 | // Unsupported. |
325 | break; |
326 | } |
327 | if (dir.suppressed) |
328 | continue; |
329 | int size = scanf_get_value_size(dir: &dir); |
330 | if (size == FSS_INVALID) { |
331 | Report(format: "%s: WARNING: unexpected format specifier in scanf interceptor: %.*s\n" , |
332 | SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); |
333 | break; |
334 | } |
335 | void *argp = va_arg(aq, void *); |
336 | if (dir.convSpecifier != 'n') |
337 | --n_inputs; |
338 | if (n_inputs < 0) |
339 | break; |
340 | if (size == FSS_STRLEN) { |
341 | size = internal_strlen(s: (const char *)argp) + 1; |
342 | } else if (size == FSS_WCSLEN) { |
343 | // FIXME: actually use wcslen() to calculate it. |
344 | size = 0; |
345 | } |
346 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); |
347 | // For %mc/%mC/%ms/%m[/%mS, write the allocated output buffer as well. |
348 | if (dir.allocate) { |
349 | if (char *buf = *(char **)argp) { |
350 | if (dir.convSpecifier == 'c') |
351 | size = 1; |
352 | else if (dir.convSpecifier == 'C') |
353 | size = sizeof(wchar_t); |
354 | else if (dir.convSpecifier == 'S') |
355 | size = (internal_wcslen(s: (wchar_t *)buf) + 1) * sizeof(wchar_t); |
356 | else // 's' or '[' |
357 | size = internal_strlen(s: buf) + 1; |
358 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, buf, size); |
359 | } |
360 | } |
361 | } |
362 | } |
363 | |
364 | #if SANITIZER_INTERCEPT_PRINTF |
365 | |
366 | struct PrintfDirective { |
367 | int fieldWidth; |
368 | int fieldPrecision; |
369 | int argIdx; // width argument index, or -1 if not specified ("%*n$") |
370 | int precisionIdx; // precision argument index, or -1 if not specified (".*n$") |
371 | const char *begin; |
372 | const char *end; |
373 | bool starredWidth; |
374 | bool starredPrecision; |
375 | char lengthModifier[2]; |
376 | char convSpecifier; |
377 | }; |
378 | |
379 | static const char *maybe_parse_number(const char *p, int *out) { |
380 | if (*p >= '0' && *p <= '9') |
381 | p = parse_number(p, out); |
382 | return p; |
383 | } |
384 | |
385 | static const char *maybe_parse_number_or_star(const char *p, int *out, |
386 | bool *star) { |
387 | if (*p == '*') { |
388 | *star = true; |
389 | ++p; |
390 | } else { |
391 | *star = false; |
392 | p = maybe_parse_number(p, out); |
393 | } |
394 | return p; |
395 | } |
396 | |
397 | // Parse printf format string. Same as scanf_parse_next. |
398 | static const char *printf_parse_next(const char *p, PrintfDirective *dir) { |
399 | internal_memset(s: dir, c: 0, n: sizeof(*dir)); |
400 | dir->argIdx = -1; |
401 | dir->precisionIdx = -1; |
402 | |
403 | while (*p) { |
404 | if (*p != '%') { |
405 | ++p; |
406 | continue; |
407 | } |
408 | dir->begin = p; |
409 | ++p; |
410 | // %% |
411 | if (*p == '%') { |
412 | ++p; |
413 | continue; |
414 | } |
415 | if (*p == '\0') { |
416 | return nullptr; |
417 | } |
418 | // %n$ |
419 | p = maybe_parse_param_index(p, out: &dir->precisionIdx); |
420 | CHECK(p); |
421 | // Flags |
422 | while (char_is_one_of(c: *p, s: "'-+ #0" )) { |
423 | ++p; |
424 | } |
425 | // Field width |
426 | p = maybe_parse_number_or_star(p, out: &dir->fieldWidth, |
427 | star: &dir->starredWidth); |
428 | if (!p) |
429 | return nullptr; |
430 | // Precision |
431 | if (*p == '.') { |
432 | ++p; |
433 | // Actual precision is optional (surprise!) |
434 | p = maybe_parse_number_or_star(p, out: &dir->fieldPrecision, |
435 | star: &dir->starredPrecision); |
436 | if (!p) |
437 | return nullptr; |
438 | // m$ |
439 | if (dir->starredPrecision) { |
440 | p = maybe_parse_param_index(p, out: &dir->precisionIdx); |
441 | CHECK(p); |
442 | } |
443 | } |
444 | // Length modifier. |
445 | p = maybe_parse_length_modifier(p, ll: dir->lengthModifier); |
446 | // Conversion specifier. |
447 | dir->convSpecifier = *p++; |
448 | dir->end = p; |
449 | break; |
450 | } |
451 | return p; |
452 | } |
453 | |
454 | static int printf_get_value_size(PrintfDirective *dir) { |
455 | if (char_is_one_of(c: dir->convSpecifier, s: "cCsS" )) { |
456 | unsigned charSize = |
457 | format_get_char_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier); |
458 | if (charSize == 0) |
459 | return FSS_INVALID; |
460 | if (char_is_one_of(c: dir->convSpecifier, s: "sS" )) { |
461 | return (charSize == sizeof(char)) ? FSS_STRLEN : FSS_WCSLEN; |
462 | } |
463 | return charSize; |
464 | } |
465 | |
466 | return format_get_value_size(convSpecifier: dir->convSpecifier, lengthModifier: dir->lengthModifier, promote_float: true); |
467 | } |
468 | |
469 | #define SKIP_SCALAR_ARG(aq, convSpecifier, size) \ |
470 | do { \ |
471 | if (format_is_float_conv(convSpecifier)) { \ |
472 | switch (size) { \ |
473 | case 8: \ |
474 | va_arg(*aq, double); \ |
475 | break; \ |
476 | case 12: \ |
477 | va_arg(*aq, long double); \ |
478 | break; \ |
479 | case 16: \ |
480 | va_arg(*aq, long double); \ |
481 | break; \ |
482 | default: \ |
483 | Report("WARNING: unexpected floating-point arg size" \ |
484 | " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ |
485 | return; \ |
486 | } \ |
487 | } else { \ |
488 | switch (size) { \ |
489 | case 1: \ |
490 | case 2: \ |
491 | case 4: \ |
492 | va_arg(*aq, u32); \ |
493 | break; \ |
494 | case 8: \ |
495 | va_arg(*aq, u64); \ |
496 | break; \ |
497 | default: \ |
498 | Report("WARNING: unexpected arg size" \ |
499 | " in printf interceptor: %zu\n", static_cast<uptr>(size)); \ |
500 | return; \ |
501 | } \ |
502 | } \ |
503 | } while (0) |
504 | |
505 | // Common part of *printf interceptors. |
506 | // Process format string and va_list, and report all load ranges. |
507 | static void printf_common(void *ctx, const char *format, va_list aq) { |
508 | COMMON_INTERCEPTOR_READ_RANGE(ctx, format, internal_strlen(format) + 1); |
509 | |
510 | const char *p = format; |
511 | |
512 | while (*p) { |
513 | PrintfDirective dir; |
514 | p = printf_parse_next(p, dir: &dir); |
515 | if (!p) |
516 | break; |
517 | if (dir.convSpecifier == 0) { |
518 | // This can only happen at the end of the format string. |
519 | CHECK_EQ(*p, 0); |
520 | break; |
521 | } |
522 | // Here the directive is valid. Do what it says. |
523 | if (dir.argIdx != -1 || dir.precisionIdx != -1) { |
524 | // Unsupported. |
525 | break; |
526 | } |
527 | if (dir.starredWidth) { |
528 | // Dynamic width |
529 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); |
530 | } |
531 | if (dir.starredPrecision) { |
532 | // Dynamic precision |
533 | SKIP_SCALAR_ARG(&aq, 'd', sizeof(int)); |
534 | } |
535 | // %m does not require an argument: strlen(errno). |
536 | if (dir.convSpecifier == 'm') |
537 | continue; |
538 | int size = printf_get_value_size(dir: &dir); |
539 | if (size == FSS_INVALID) { |
540 | static int ReportedOnce; |
541 | if (!ReportedOnce++) |
542 | Report( |
543 | format: "%s: WARNING: unexpected format specifier in printf " |
544 | "interceptor: %.*s (reported once per process)\n" , |
545 | SanitizerToolName, static_cast<int>(dir.end - dir.begin), dir.begin); |
546 | break; |
547 | } |
548 | if (dir.convSpecifier == 'n') { |
549 | void *argp = va_arg(aq, void *); |
550 | COMMON_INTERCEPTOR_WRITE_RANGE(ctx, argp, size); |
551 | continue; |
552 | } else if (size == FSS_STRLEN) { |
553 | if (void *argp = va_arg(aq, void *)) { |
554 | uptr len; |
555 | if (dir.starredPrecision) { |
556 | // FIXME: properly support starred precision for strings. |
557 | len = 0; |
558 | } else if (dir.fieldPrecision > 0) { |
559 | // Won't read more than "precision" symbols. |
560 | len = internal_strnlen(s: (const char *)argp, maxlen: dir.fieldPrecision); |
561 | if (len < (uptr)dir.fieldPrecision) |
562 | len++; |
563 | } else { |
564 | // Whole string will be accessed. |
565 | len = internal_strlen(s: (const char *)argp) + 1; |
566 | } |
567 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, len); |
568 | } |
569 | } else if (size == FSS_WCSLEN) { |
570 | if (void *argp = va_arg(aq, void *)) { |
571 | // FIXME: Properly support wide-character strings (via wcsrtombs). |
572 | COMMON_INTERCEPTOR_READ_RANGE(ctx, argp, 0); |
573 | } |
574 | } else { |
575 | // Skip non-pointer args |
576 | SKIP_SCALAR_ARG(&aq, dir.convSpecifier, size); |
577 | } |
578 | } |
579 | } |
580 | |
581 | #endif // SANITIZER_INTERCEPT_PRINTF |
582 | |