| 1 | // -*- C++ -*- |
| 2 | //===----------------------------------------------------------------------===// |
| 3 | // |
| 4 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 5 | // See https://llvm.org/LICENSE.txt for license information. |
| 6 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 7 | // |
| 8 | //===----------------------------------------------------------------------===// |
| 9 | |
| 10 | #ifndef _LIBCPP___FORMAT_WRITE_ESCAPED_H |
| 11 | #define _LIBCPP___FORMAT_WRITE_ESCAPED_H |
| 12 | |
| 13 | #include <__algorithm/ranges_copy.h> |
| 14 | #include <__algorithm/ranges_for_each.h> |
| 15 | #include <__charconv/to_chars_integral.h> |
| 16 | #include <__charconv/to_chars_result.h> |
| 17 | #include <__chrono/statically_widen.h> |
| 18 | #include <__format/escaped_output_table.h> |
| 19 | #include <__format/extended_grapheme_cluster_table.h> |
| 20 | #include <__format/formatter_output.h> |
| 21 | #include <__format/parser_std_format_spec.h> |
| 22 | #include <__format/unicode.h> |
| 23 | #include <__iterator/back_insert_iterator.h> |
| 24 | #include <__memory/addressof.h> |
| 25 | #include <__system_error/errc.h> |
| 26 | #include <__type_traits/make_unsigned.h> |
| 27 | #include <__utility/move.h> |
| 28 | #include <string_view> |
| 29 | |
| 30 | #if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) |
| 31 | # pragma GCC system_header |
| 32 | #endif |
| 33 | |
| 34 | _LIBCPP_PUSH_MACROS |
| 35 | #include <__undef_macros> |
| 36 | |
| 37 | _LIBCPP_BEGIN_NAMESPACE_STD |
| 38 | |
| 39 | namespace __formatter { |
| 40 | |
| 41 | #if _LIBCPP_STD_VER >= 20 |
| 42 | |
| 43 | /// Writes a string using format's width estimation algorithm. |
| 44 | /// |
| 45 | /// \note When \c _LIBCPP_HAS_UNICODE is false the function assumes the input is ASCII. |
| 46 | template <class _CharT> |
| 47 | _LIBCPP_HIDE_FROM_ABI auto |
| 48 | __write_string(basic_string_view<_CharT> __str, |
| 49 | output_iterator<const _CharT&> auto __out_it, |
| 50 | __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { |
| 51 | if (!__specs.__has_precision()) |
| 52 | return __formatter::__write_string_no_precision(__str, std::move(__out_it), __specs); |
| 53 | |
| 54 | int __size = __formatter::__truncate(__str, __specs.__precision_); |
| 55 | |
| 56 | return __formatter::__write(__str.begin(), __str.end(), std::move(__out_it), __specs, __size); |
| 57 | } |
| 58 | |
| 59 | #endif // _LIBCPP_STD_VER >= 20 |
| 60 | #if _LIBCPP_STD_VER >= 23 |
| 61 | |
| 62 | struct __nul_terminator {}; |
| 63 | |
| 64 | template <class _CharT> |
| 65 | _LIBCPP_HIDE_FROM_ABI bool operator==(const _CharT* __cstr, __nul_terminator) { |
| 66 | return *__cstr == _CharT('\0'); |
| 67 | } |
| 68 | |
| 69 | template <class _CharT> |
| 70 | _LIBCPP_HIDE_FROM_ABI void |
| 71 | __write_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value, const _CharT* __prefix) { |
| 72 | back_insert_iterator __out_it{__str}; |
| 73 | std::ranges::copy(__prefix, __nul_terminator{}, __out_it); |
| 74 | |
| 75 | char __buffer[8]; |
| 76 | to_chars_result __r = std::to_chars(first: std::begin(array&: __buffer), last: std::end(array&: __buffer), __value, base: 16); |
| 77 | _LIBCPP_ASSERT_INTERNAL(__r.ec == errc(0), "Internal buffer too small" ); |
| 78 | std::ranges::copy(std::begin(array&: __buffer), __r.ptr, __out_it); |
| 79 | |
| 80 | __str += _CharT('}'); |
| 81 | } |
| 82 | |
| 83 | // [format.string.escaped]/2.2.1.2 |
| 84 | // ... |
| 85 | // then the sequence \u{hex-digit-sequence} is appended to E, where |
| 86 | // hex-digit-sequence is the shortest hexadecimal representation of C using |
| 87 | // lower-case hexadecimal digits. |
| 88 | template <class _CharT> |
| 89 | _LIBCPP_HIDE_FROM_ABI void __write_well_formed_escaped_code_unit(basic_string<_CharT>& __str, char32_t __value) { |
| 90 | __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\u{" )); |
| 91 | } |
| 92 | |
| 93 | // [format.string.escaped]/2.2.3 |
| 94 | // Otherwise (X is a sequence of ill-formed code units), each code unit U is |
| 95 | // appended to E in order as the sequence \x{hex-digit-sequence}, where |
| 96 | // hex-digit-sequence is the shortest hexadecimal representation of U using |
| 97 | // lower-case hexadecimal digits. |
| 98 | template <class _CharT> |
| 99 | _LIBCPP_HIDE_FROM_ABI void __write_escape_ill_formed_code_unit(basic_string<_CharT>& __str, char32_t __value) { |
| 100 | __formatter::__write_escaped_code_unit(__str, __value, _LIBCPP_STATICALLY_WIDEN(_CharT, "\\x{" )); |
| 101 | } |
| 102 | |
| 103 | template <class _CharT> |
| 104 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool |
| 105 | __is_escaped_sequence_written(basic_string<_CharT>& __str, bool __last_escaped, char32_t __value) { |
| 106 | # if !_LIBCPP_HAS_UNICODE |
| 107 | // For ASCII assume everything above 127 is printable. |
| 108 | if (__value > 127) |
| 109 | return false; |
| 110 | # endif |
| 111 | |
| 112 | // [format.string.escaped]/2.2.1.2.1 |
| 113 | // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar |
| 114 | // value whose Unicode property General_Category has a value in the groups |
| 115 | // Separator (Z) or Other (C), as described by UAX #44 of the Unicode Standard, |
| 116 | if (!__escaped_output_table::__needs_escape(code_point: __value)) |
| 117 | // [format.string.escaped]/2.2.1.2.2 |
| 118 | // CE is UTF-8, UTF-16, or UTF-32 and C corresponds to a Unicode scalar |
| 119 | // value with the Unicode property Grapheme_Extend=Yes as described by UAX |
| 120 | // #44 of the Unicode Standard and C is not immediately preceded in S by a |
| 121 | // character P appended to E without translation to an escape sequence, |
| 122 | if (!__last_escaped || __extended_grapheme_custer_property_boundary::__get_property(code_point: __value) != |
| 123 | __extended_grapheme_custer_property_boundary::__property::__Extend) |
| 124 | return false; |
| 125 | |
| 126 | __formatter::__write_well_formed_escaped_code_unit(__str, __value); |
| 127 | return true; |
| 128 | } |
| 129 | |
| 130 | template <class _CharT> |
| 131 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI constexpr char32_t __to_char32(_CharT __value) { |
| 132 | return static_cast<make_unsigned_t<_CharT>>(__value); |
| 133 | } |
| 134 | |
| 135 | enum class __escape_quotation_mark { __apostrophe, __double_quote }; |
| 136 | |
| 137 | // [format.string.escaped]/2 |
| 138 | template <class _CharT> |
| 139 | [[nodiscard]] _LIBCPP_HIDE_FROM_ABI bool __is_escaped_sequence_written( |
| 140 | basic_string<_CharT>& __str, char32_t __value, bool __last_escaped, __escape_quotation_mark __mark) { |
| 141 | // 2.2.1.1 - Mapped character in [tab:format.escape.sequences] |
| 142 | switch (__value) { |
| 143 | case _CharT('\t'): |
| 144 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\t" ); |
| 145 | return true; |
| 146 | case _CharT('\n'): |
| 147 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\n" ); |
| 148 | return true; |
| 149 | case _CharT('\r'): |
| 150 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, "\\r" ); |
| 151 | return true; |
| 152 | case _CharT('\''): |
| 153 | if (__mark == __escape_quotation_mark::__apostrophe) |
| 154 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\')" ); |
| 155 | else |
| 156 | __str += __value; |
| 157 | return true; |
| 158 | case _CharT('"'): |
| 159 | if (__mark == __escape_quotation_mark::__double_quote) |
| 160 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\")" ); |
| 161 | else |
| 162 | __str += __value; |
| 163 | return true; |
| 164 | case _CharT('\\'): |
| 165 | __str += _LIBCPP_STATICALLY_WIDEN(_CharT, R"(\\)" ); |
| 166 | return true; |
| 167 | |
| 168 | // 2.2.1.2 - Space |
| 169 | case _CharT(' '): |
| 170 | __str += __value; |
| 171 | return true; |
| 172 | } |
| 173 | |
| 174 | // 2.2.2 |
| 175 | // Otherwise, if X is a shift sequence, the effect on E and further |
| 176 | // decoding of S is unspecified. |
| 177 | // For now shift sequences are ignored and treated as Unicode. Other parts |
| 178 | // of the format library do the same. It's unknown how ostream treats them. |
| 179 | // TODO FMT determine what to do with shift sequences. |
| 180 | |
| 181 | // 2.2.1.2.1 and 2.2.1.2.2 - Escape |
| 182 | return __formatter::__is_escaped_sequence_written(__str, __last_escaped, __formatter::__to_char32(__value)); |
| 183 | } |
| 184 | |
| 185 | template <class _CharT> |
| 186 | _LIBCPP_HIDE_FROM_ABI void |
| 187 | __escape(basic_string<_CharT>& __str, basic_string_view<_CharT> __values, __escape_quotation_mark __mark) { |
| 188 | __unicode::__code_point_view<_CharT> __view{__values.begin(), __values.end()}; |
| 189 | |
| 190 | // When the first code unit has the property Grapheme_Extend=Yes it needs to |
| 191 | // be escaped. This happens when the previous code unit was also escaped. |
| 192 | bool __escape = true; |
| 193 | while (!__view.__at_end()) { |
| 194 | auto __first = __view.__position(); |
| 195 | typename __unicode::__consume_result __result = __view.__consume(); |
| 196 | if (__result.__status == __unicode::__consume_result::__ok) { |
| 197 | __escape = __formatter::__is_escaped_sequence_written(__str, __result.__code_point, __escape, __mark); |
| 198 | if (!__escape) |
| 199 | // 2.2.1.3 - Add the character |
| 200 | ranges::copy(__first, __view.__position(), std::back_insert_iterator(__str)); |
| 201 | } else { |
| 202 | // 2.2.3 sequence of ill-formed code units |
| 203 | ranges::for_each(__first, __view.__position(), [&](_CharT __value) { |
| 204 | __formatter::__write_escape_ill_formed_code_unit(__str, __formatter::__to_char32(__value)); |
| 205 | }); |
| 206 | } |
| 207 | } |
| 208 | } |
| 209 | |
| 210 | template <class _CharT> |
| 211 | _LIBCPP_HIDE_FROM_ABI auto |
| 212 | __format_escaped_char(_CharT __value, |
| 213 | output_iterator<const _CharT&> auto __out_it, |
| 214 | __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { |
| 215 | basic_string<_CharT> __str; |
| 216 | __str += _CharT('\''); |
| 217 | __formatter::__escape(__str, basic_string_view{std::addressof(__value), 1}, __escape_quotation_mark::__apostrophe); |
| 218 | __str += _CharT('\''); |
| 219 | return __formatter::__write(__str.data(), __str.data() + __str.size(), std::move(__out_it), __specs, __str.size()); |
| 220 | } |
| 221 | |
| 222 | template <class _CharT> |
| 223 | _LIBCPP_HIDE_FROM_ABI auto |
| 224 | __format_escaped_string(basic_string_view<_CharT> __values, |
| 225 | output_iterator<const _CharT&> auto __out_it, |
| 226 | __format_spec::__parsed_specifications<_CharT> __specs) -> decltype(__out_it) { |
| 227 | basic_string<_CharT> __str; |
| 228 | __str += _CharT('"'); |
| 229 | __formatter::__escape(__str, __values, __escape_quotation_mark::__double_quote); |
| 230 | __str += _CharT('"'); |
| 231 | return __formatter::__write_string(basic_string_view{__str}, std::move(__out_it), __specs); |
| 232 | } |
| 233 | |
| 234 | #endif // _LIBCPP_STD_VER >= 23 |
| 235 | |
| 236 | } // namespace __formatter |
| 237 | |
| 238 | _LIBCPP_END_NAMESPACE_STD |
| 239 | |
| 240 | _LIBCPP_POP_MACROS |
| 241 | |
| 242 | #endif // _LIBCPP___FORMAT_WRITE_ESCAPED_H |
| 243 | |