1 | //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains the implementation of formatted_raw_ostream. |
10 | // |
11 | //===----------------------------------------------------------------------===// |
12 | |
13 | #include "llvm/Support/FormattedStream.h" |
14 | #include "llvm/Support/ConvertUTF.h" |
15 | #include "llvm/Support/Debug.h" |
16 | #include "llvm/Support/Unicode.h" |
17 | #include "llvm/Support/raw_ostream.h" |
18 | #include <algorithm> |
19 | |
20 | using namespace llvm; |
21 | |
22 | /// UpdatePosition - Examine the given char sequence and figure out which |
23 | /// column we end up in after output, and how many line breaks are contained. |
24 | /// This assumes that the input string is well-formed UTF-8, and takes into |
25 | /// account Unicode characters which render as multiple columns wide. |
26 | void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) { |
27 | unsigned &Column = Position.first; |
28 | unsigned &Line = Position.second; |
29 | |
30 | auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) { |
31 | int Width = sys::unicode::columnWidthUTF8(Text: CP); |
32 | if (Width != sys::unicode::ErrorNonPrintableCharacter) |
33 | Column += Width; |
34 | |
35 | // The only special whitespace characters we care about are single-byte. |
36 | if (CP.size() > 1) |
37 | return; |
38 | |
39 | switch (CP[0]) { |
40 | case '\n': |
41 | Line += 1; |
42 | [[fallthrough]]; |
43 | case '\r': |
44 | Column = 0; |
45 | break; |
46 | case '\t': |
47 | // Assumes tab stop = 8 characters. |
48 | Column += (8 - (Column & 0x7)) & 0x7; |
49 | break; |
50 | } |
51 | }; |
52 | |
53 | // If we have a partial UTF-8 sequence from the previous buffer, check that |
54 | // first. |
55 | if (PartialUTF8Char.size()) { |
56 | size_t BytesFromBuffer = |
57 | getNumBytesForUTF8(firstByte: PartialUTF8Char[0]) - PartialUTF8Char.size(); |
58 | if (Size < BytesFromBuffer) { |
59 | // If we still don't have enough bytes for a complete code point, just |
60 | // append what we have. |
61 | PartialUTF8Char.append(RHS: StringRef(Ptr, Size)); |
62 | return; |
63 | } else { |
64 | // The first few bytes from the buffer will complete the code point. |
65 | // Concatenate them and process their effect on the line and column |
66 | // numbers. |
67 | PartialUTF8Char.append(RHS: StringRef(Ptr, BytesFromBuffer)); |
68 | ProcessUTF8CodePoint(PartialUTF8Char); |
69 | PartialUTF8Char.clear(); |
70 | Ptr += BytesFromBuffer; |
71 | Size -= BytesFromBuffer; |
72 | } |
73 | } |
74 | |
75 | // Now scan the rest of the buffer. |
76 | unsigned NumBytes; |
77 | for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) { |
78 | // Fast path for printable ASCII characters without special handling. |
79 | if (*Ptr >= 0x20 && *Ptr <= 0x7e) { |
80 | NumBytes = 1; |
81 | ++Column; |
82 | continue; |
83 | } |
84 | |
85 | NumBytes = getNumBytesForUTF8(firstByte: *Ptr); |
86 | |
87 | // The buffer might end part way through a UTF-8 code unit sequence for a |
88 | // Unicode scalar value if it got flushed. If this happens, we can't know |
89 | // the display width until we see the rest of the code point. Stash the |
90 | // bytes we do have, so that we can reconstruct the whole code point later, |
91 | // even if the buffer is being flushed. |
92 | if ((unsigned)(End - Ptr) < NumBytes) { |
93 | PartialUTF8Char = StringRef(Ptr, End - Ptr); |
94 | return; |
95 | } |
96 | |
97 | ProcessUTF8CodePoint(StringRef(Ptr, NumBytes)); |
98 | } |
99 | } |
100 | |
101 | /// ComputePosition - Examine the current output and update line and column |
102 | /// counts. |
103 | void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { |
104 | if (DisableScan) |
105 | return; |
106 | |
107 | // If our previous scan pointer is inside the buffer, assume we already |
108 | // scanned those bytes. This depends on raw_ostream to not change our buffer |
109 | // in unexpected ways. |
110 | if (Ptr <= Scanned && Scanned <= Ptr + Size) |
111 | // Scan all characters added since our last scan to determine the new |
112 | // column. |
113 | UpdatePosition(Ptr: Scanned, Size: Size - (Scanned - Ptr)); |
114 | else |
115 | UpdatePosition(Ptr, Size); |
116 | |
117 | // Update the scanning pointer. |
118 | Scanned = Ptr + Size; |
119 | } |
120 | |
121 | /// PadToColumn - Align the output to some column number. |
122 | /// |
123 | /// \param NewCol - The column to move to. |
124 | /// |
125 | formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { |
126 | // Figure out what's in the buffer and add it to the column count. |
127 | ComputePosition(Ptr: getBufferStart(), Size: GetNumBytesInBuffer()); |
128 | |
129 | // Output spaces until we reach the desired column. |
130 | indent(NumSpaces: std::max(a: int(NewCol - getColumn()), b: 1)); |
131 | return *this; |
132 | } |
133 | |
134 | void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { |
135 | // Figure out what's in the buffer and add it to the column count. |
136 | ComputePosition(Ptr, Size); |
137 | |
138 | // Write the data to the underlying stream (which is unbuffered, so |
139 | // the data will be immediately written out). |
140 | TheStream->write(Ptr, Size); |
141 | |
142 | // Reset the scanning pointer. |
143 | Scanned = nullptr; |
144 | } |
145 | |
146 | /// fouts() - This returns a reference to a formatted_raw_ostream for |
147 | /// standard output. Use it like: fouts() << "foo" << "bar"; |
148 | formatted_raw_ostream &llvm::fouts() { |
149 | static formatted_raw_ostream S(outs()); |
150 | return S; |
151 | } |
152 | |
153 | /// ferrs() - This returns a reference to a formatted_raw_ostream for |
154 | /// standard error. Use it like: ferrs() << "foo" << "bar"; |
155 | formatted_raw_ostream &llvm::ferrs() { |
156 | static formatted_raw_ostream S(errs()); |
157 | return S; |
158 | } |
159 | |
160 | /// fdbgs() - This returns a reference to a formatted_raw_ostream for |
161 | /// the debug stream. Use it like: fdbgs() << "foo" << "bar"; |
162 | formatted_raw_ostream &llvm::fdbgs() { |
163 | static formatted_raw_ostream S(dbgs()); |
164 | return S; |
165 | } |
166 | |