| 1 | //===-- String to integer conversion utils ----------------------*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | |
| 9 | // ----------------------------------------------------------------------------- |
| 10 | // **** WARNING **** |
| 11 | // This file is shared with libc++. You should also be careful when adding |
| 12 | // dependencies to this file, since it needs to build for all libc++ targets. |
| 13 | // ----------------------------------------------------------------------------- |
| 14 | |
| 15 | #ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
| 16 | #define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
| 17 | |
| 18 | #include "hdr/errno_macros.h" // For ERANGE |
| 19 | #include "src/__support/CPP/limits.h" |
| 20 | #include "src/__support/CPP/type_traits.h" |
| 21 | #include "src/__support/CPP/type_traits/make_unsigned.h" |
| 22 | #include "src/__support/big_int.h" |
| 23 | #include "src/__support/common.h" |
| 24 | #include "src/__support/ctype_utils.h" |
| 25 | #include "src/__support/macros/config.h" |
| 26 | #include "src/__support/str_to_num_result.h" |
| 27 | #include "src/__support/uint128.h" |
| 28 | #include "src/__support/wctype_utils.h" |
| 29 | |
| 30 | namespace LIBC_NAMESPACE_DECL { |
| 31 | namespace internal { |
| 32 | |
| 33 | // Returns the idx to the first character in src that is not a whitespace |
| 34 | // character (as determined by isspace()) |
| 35 | template <typename CharType> |
| 36 | LIBC_INLINE size_t |
| 37 | first_non_whitespace(const CharType *__restrict src, |
| 38 | size_t src_len = cpp::numeric_limits<size_t>::max()) { |
| 39 | size_t src_cur = 0; |
| 40 | for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur) |
| 41 | ; |
| 42 | return src_cur; |
| 43 | } |
| 44 | |
| 45 | // Returns +1, -1, or 0 if 'src' starts with (respectively) |
| 46 | // plus sign, minus sign, or neither. |
| 47 | template <typename CharType> |
| 48 | LIBC_INLINE static int get_sign(const CharType *__restrict src) { |
| 49 | if (is_char_or_wchar(src[0], '+', L'+')) |
| 50 | return 1; |
| 51 | if (is_char_or_wchar(src[0], '-', L'-')) |
| 52 | return -1; |
| 53 | return 0; |
| 54 | } |
| 55 | |
| 56 | // checks if the next 3 characters of the string pointer are the start of a |
| 57 | // hexadecimal number. Does not advance the string pointer. |
| 58 | template <typename CharType> |
| 59 | LIBC_INLINE static bool is_hex_start(const CharType *__restrict src, |
| 60 | size_t src_len) { |
| 61 | if (src_len < 3) |
| 62 | return false; |
| 63 | return is_char_or_wchar(src[0], '0', L'0') && |
| 64 | is_char_or_wchar(tolower(src[1]), 'x', L'x') && isalnum(src[2]) && |
| 65 | b36_char_to_int(src[2]) < 16; |
| 66 | } |
| 67 | |
| 68 | // Takes the address of the string pointer and parses the base from the start of |
| 69 | // it. |
| 70 | template <typename CharType> |
| 71 | LIBC_INLINE static int infer_base(const CharType *__restrict src, |
| 72 | size_t src_len) { |
| 73 | // A hexadecimal number is defined as "the prefix 0x or 0X followed by a |
| 74 | // sequence of the decimal digits and the letters a (or A) through f (or F) |
| 75 | // with values 10 through 15 respectively." (C standard 6.4.4.1) |
| 76 | if (is_hex_start(src, src_len)) |
| 77 | return 16; |
| 78 | // An octal number is defined as "the prefix 0 optionally followed by a |
| 79 | // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any |
| 80 | // number that starts with 0, including just 0, is an octal number. |
| 81 | if (src_len > 0 && is_char_or_wchar(src[0], '0', L'0')) { |
| 82 | return 8; |
| 83 | } |
| 84 | // A decimal number is defined as beginning "with a nonzero digit and |
| 85 | // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1) |
| 86 | return 10; |
| 87 | } |
| 88 | |
| 89 | // ----------------------------------------------------------------------------- |
| 90 | // **** WARNING **** |
| 91 | // This interface is shared with libc++, if you change this interface you need |
| 92 | // to update it in both libc and libc++. |
| 93 | // ----------------------------------------------------------------------------- |
| 94 | // Takes a pointer to a string and the base to convert to. This function is used |
| 95 | // as the backend for all of the string to int functions. |
| 96 | template <typename T, typename CharType> |
| 97 | LIBC_INLINE StrToNumResult<T> |
| 98 | strtointeger(const CharType *__restrict src, int base, |
| 99 | const size_t src_len = cpp::numeric_limits<size_t>::max()) { |
| 100 | using ResultType = make_integral_or_big_int_unsigned_t<T>; |
| 101 | |
| 102 | if (src_len == 0) |
| 103 | return {0, 0, 0}; |
| 104 | |
| 105 | if (base < 0 || base == 1 || base > 36) |
| 106 | return {0, 0, EINVAL}; |
| 107 | |
| 108 | size_t src_cur = first_non_whitespace(src, src_len); |
| 109 | if (src_cur == src_len) { |
| 110 | return {0, 0, 0}; |
| 111 | } |
| 112 | |
| 113 | int sign = get_sign(src + src_cur); |
| 114 | bool is_positive = (sign >= 0); |
| 115 | src_cur += (sign != 0); |
| 116 | |
| 117 | if (base == 0) |
| 118 | base = infer_base(src + src_cur, src_len - src_cur); |
| 119 | |
| 120 | if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur)) |
| 121 | src_cur = src_cur + 2; |
| 122 | |
| 123 | constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>; |
| 124 | ResultType constexpr NEGATIVE_MAX = |
| 125 | !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1 |
| 126 | : cpp::numeric_limits<T>::max(); |
| 127 | ResultType const abs_max = |
| 128 | (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX); |
| 129 | ResultType const abs_max_div_by_base = |
| 130 | abs_max / static_cast<ResultType>(base); |
| 131 | |
| 132 | bool is_number = false; |
| 133 | int error_val = 0; |
| 134 | ResultType result = 0; |
| 135 | while (src_cur < src_len && isalnum(src[src_cur])) { |
| 136 | int cur_digit = b36_char_to_int(src[src_cur]); |
| 137 | if (cur_digit >= base) |
| 138 | break; |
| 139 | |
| 140 | is_number = true; |
| 141 | ++src_cur; |
| 142 | |
| 143 | // If the number has already hit the maximum value for the current type then |
| 144 | // the result cannot change, but we still need to advance src to the end of |
| 145 | // the number. |
| 146 | if (result == abs_max) { |
| 147 | error_val = ERANGE; |
| 148 | continue; |
| 149 | } |
| 150 | |
| 151 | if (result > abs_max_div_by_base) { |
| 152 | result = abs_max; |
| 153 | error_val = ERANGE; |
| 154 | } else { |
| 155 | result = result * static_cast<ResultType>(base); |
| 156 | } |
| 157 | if (result > abs_max - static_cast<ResultType>(cur_digit)) { |
| 158 | result = abs_max; |
| 159 | error_val = ERANGE; |
| 160 | } else { |
| 161 | result = result + static_cast<ResultType>(cur_digit); |
| 162 | } |
| 163 | } |
| 164 | |
| 165 | ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0; |
| 166 | |
| 167 | if (error_val == ERANGE) { |
| 168 | if (is_positive || IS_UNSIGNED) |
| 169 | return {cpp::numeric_limits<T>::max(), str_len, error_val}; |
| 170 | else // T is signed and there is a negative overflow |
| 171 | return {cpp::numeric_limits<T>::min(), str_len, error_val}; |
| 172 | } |
| 173 | |
| 174 | return {static_cast<T>(is_positive ? result : -result), str_len, error_val}; |
| 175 | } |
| 176 | |
| 177 | } // namespace internal |
| 178 | } // namespace LIBC_NAMESPACE_DECL |
| 179 | |
| 180 | #endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H |
| 181 | |