1//===-- String to integer conversion utils ----------------------*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9// -----------------------------------------------------------------------------
10// **** WARNING ****
11// This file is shared with libc++. You should also be careful when adding
12// dependencies to this file, since it needs to build for all libc++ targets.
13// -----------------------------------------------------------------------------
14
15#ifndef LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
16#define LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
17
18#include "hdr/errno_macros.h" // For ERANGE
19#include "src/__support/CPP/limits.h"
20#include "src/__support/CPP/type_traits.h"
21#include "src/__support/CPP/type_traits/make_unsigned.h"
22#include "src/__support/big_int.h"
23#include "src/__support/common.h"
24#include "src/__support/ctype_utils.h"
25#include "src/__support/macros/config.h"
26#include "src/__support/str_to_num_result.h"
27#include "src/__support/uint128.h"
28#include "src/__support/wctype_utils.h"
29
30namespace LIBC_NAMESPACE_DECL {
31namespace internal {
32
33// Returns the idx to the first character in src that is not a whitespace
34// character (as determined by isspace())
35template <typename CharType>
36LIBC_INLINE constexpr size_t
37first_non_whitespace(const CharType *__restrict src,
38 size_t src_len = cpp::numeric_limits<size_t>::max()) {
39 size_t src_cur = 0;
40 for (; src_cur < src_len && internal::isspace(src[src_cur]); ++src_cur)
41 ;
42 return src_cur;
43}
44
45// Returns +1, -1, or 0 if 'src' starts with (respectively)
46// plus sign, minus sign, or neither.
47template <typename CharType>
48LIBC_INLINE static int get_sign(const CharType *__restrict src) {
49 if (is_char_or_wchar(src[0], '+', L'+'))
50 return 1;
51 if (is_char_or_wchar(src[0], '-', L'-'))
52 return -1;
53 return 0;
54}
55
56// checks if the next 3 characters of the string pointer are the start of a
57// hexadecimal number. Does not advance the string pointer.
58template <typename CharType>
59LIBC_INLINE static bool is_hex_start(const CharType *__restrict src,
60 size_t src_len) {
61 if (src_len < 3)
62 return false;
63 return is_char_or_wchar(src[0], '0', L'0') &&
64 is_char_or_wchar(tolower(src[1]), 'x', L'x') && isalnum(src[2]) &&
65 b36_char_to_int(src[2]) < 16;
66}
67
68// checks if the next 3 characters of the string pointer are the start of a
69// binary number. Does not advance the string pointer.
70template <typename CharType>
71LIBC_INLINE static bool is_binary_start(const CharType *__restrict src,
72 size_t src_len) {
73 if (src_len < 3)
74 return false;
75 return is_char_or_wchar(src[0], '0', L'0') &&
76 is_char_or_wchar(tolower(src[1]), 'b', L'b') && isalnum(src[2]) &&
77 b36_char_to_int(src[2]) < 2;
78}
79
80// Takes the address of the string pointer and parses the base from the start of
81// it.
82template <typename CharType>
83LIBC_INLINE static int infer_base(const CharType *__restrict src,
84 size_t src_len) {
85 // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
86 // sequence of the decimal digits and the letters a (or A) through f (or F)
87 // with values 10 through 15 respectively." (C standard 6.4.4.1)
88 if (is_hex_start(src, src_len))
89 return 16;
90 // A binary number is defined as "the prefix 0b or 0B optionally followed
91 // by a sequence of letters and digits." (C standard 7.24.1.7)
92 if (is_binary_start(src, src_len))
93 return 2;
94 // An octal number is defined as "the prefix 0 optionally followed by a
95 // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
96 // number that starts with 0, including just 0, is an octal number.
97 if (src_len > 0 && is_char_or_wchar(src[0], '0', L'0')) {
98 return 8;
99 }
100 // A decimal number is defined as beginning "with a nonzero digit and
101 // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
102 return 10;
103}
104
105// -----------------------------------------------------------------------------
106// **** WARNING ****
107// This interface is shared with libc++, if you change this interface you need
108// to update it in both libc and libc++.
109// -----------------------------------------------------------------------------
110// Takes a pointer to a string and the base to convert to. This function is used
111// as the backend for all of the string to int functions.
112template <typename T, typename CharType>
113LIBC_INLINE constexpr StrToNumResult<T>
114strtointeger(const CharType *__restrict src, int base,
115 const size_t src_len = cpp::numeric_limits<size_t>::max()) {
116 using ResultType = make_integral_or_big_int_unsigned_t<T>;
117
118 if (src_len == 0)
119 return {0, 0, 0};
120
121 if (base < 0 || base == 1 || base > 36)
122 return {0, 0, EINVAL};
123
124 size_t src_cur = first_non_whitespace(src, src_len);
125 if (src_cur == src_len) {
126 return {0, 0, 0};
127 }
128
129 int sign = get_sign(src + src_cur);
130 bool is_positive = (sign >= 0);
131 src_cur += (sign != 0);
132
133 if (base == 0)
134 base = infer_base(src + src_cur, src_len - src_cur);
135
136 if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
137 src_cur = src_cur + 2;
138
139 if (base == 2 && is_binary_start(src + src_cur, src_len - src_cur))
140 src_cur = src_cur + 2;
141
142 constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
143 ResultType constexpr NEGATIVE_MAX =
144 !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
145 : cpp::numeric_limits<T>::max();
146 ResultType const abs_max =
147 (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
148 ResultType const abs_max_div_by_base =
149 abs_max / static_cast<ResultType>(base);
150
151 bool is_number = false;
152 int error_val = 0;
153 ResultType result = 0;
154 while (src_cur < src_len && isalnum(src[src_cur])) {
155 int cur_digit = b36_char_to_int(src[src_cur]);
156 if (cur_digit >= base)
157 break;
158
159 is_number = true;
160 ++src_cur;
161
162 // If the number has already hit the maximum value for the current type then
163 // the result cannot change, but we still need to advance src to the end of
164 // the number.
165 if (result == abs_max) {
166 error_val = ERANGE;
167 continue;
168 }
169
170 if (result > abs_max_div_by_base) {
171 result = abs_max;
172 error_val = ERANGE;
173 } else {
174 result = result * static_cast<ResultType>(base);
175 }
176 if (result > abs_max - static_cast<ResultType>(cur_digit)) {
177 result = abs_max;
178 error_val = ERANGE;
179 } else {
180 result = result + static_cast<ResultType>(cur_digit);
181 }
182 }
183
184 ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0;
185
186 if (error_val == ERANGE) {
187 if (is_positive || IS_UNSIGNED)
188 return {cpp::numeric_limits<T>::max(), str_len, error_val};
189 else // T is signed and there is a negative overflow
190 return {cpp::numeric_limits<T>::min(), str_len, error_val};
191 }
192
193 return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
194}
195
196} // namespace internal
197} // namespace LIBC_NAMESPACE_DECL
198
199#endif // LLVM_LIBC_SRC___SUPPORT_STR_TO_INTEGER_H
200