| 1 | //===-- llvm/Support/FormattedStream.cpp - Formatted streams ----*- C++ -*-===// |
| 2 | // |
| 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | // |
| 9 | // This file contains the implementation of formatted_raw_ostream. |
| 10 | // |
| 11 | //===----------------------------------------------------------------------===// |
| 12 | |
| 13 | #include "llvm/Support/FormattedStream.h" |
| 14 | #include "llvm/Support/ConvertUTF.h" |
| 15 | #include "llvm/Support/Debug.h" |
| 16 | #include "llvm/Support/Unicode.h" |
| 17 | #include "llvm/Support/raw_ostream.h" |
| 18 | #include <algorithm> |
| 19 | |
| 20 | using namespace llvm; |
| 21 | |
| 22 | /// UpdatePosition - Examine the given char sequence and figure out which |
| 23 | /// column we end up in after output, and how many line breaks are contained. |
| 24 | /// This assumes that the input string is well-formed UTF-8, and takes into |
| 25 | /// account Unicode characters which render as multiple columns wide. |
| 26 | void formatted_raw_ostream::UpdatePosition(const char *Ptr, size_t Size) { |
| 27 | unsigned &Column = Position.first; |
| 28 | unsigned &Line = Position.second; |
| 29 | |
| 30 | auto ProcessUTF8CodePoint = [&Line, &Column](StringRef CP) { |
| 31 | int Width = sys::unicode::columnWidthUTF8(Text: CP); |
| 32 | if (Width != sys::unicode::ErrorNonPrintableCharacter) |
| 33 | Column += Width; |
| 34 | |
| 35 | // The only special whitespace characters we care about are single-byte. |
| 36 | if (CP.size() > 1) |
| 37 | return; |
| 38 | |
| 39 | switch (CP[0]) { |
| 40 | case '\n': |
| 41 | Line += 1; |
| 42 | [[fallthrough]]; |
| 43 | case '\r': |
| 44 | Column = 0; |
| 45 | break; |
| 46 | case '\t': |
| 47 | // Assumes tab stop = 8 characters. |
| 48 | Column += (8 - (Column & 0x7)) & 0x7; |
| 49 | break; |
| 50 | } |
| 51 | }; |
| 52 | |
| 53 | // If we have a partial UTF-8 sequence from the previous buffer, check that |
| 54 | // first. |
| 55 | if (PartialUTF8Char.size()) { |
| 56 | size_t BytesFromBuffer = |
| 57 | getNumBytesForUTF8(firstByte: PartialUTF8Char[0]) - PartialUTF8Char.size(); |
| 58 | if (Size < BytesFromBuffer) { |
| 59 | // If we still don't have enough bytes for a complete code point, just |
| 60 | // append what we have. |
| 61 | PartialUTF8Char.append(RHS: StringRef(Ptr, Size)); |
| 62 | return; |
| 63 | } else { |
| 64 | // The first few bytes from the buffer will complete the code point. |
| 65 | // Concatenate them and process their effect on the line and column |
| 66 | // numbers. |
| 67 | PartialUTF8Char.append(RHS: StringRef(Ptr, BytesFromBuffer)); |
| 68 | ProcessUTF8CodePoint(PartialUTF8Char); |
| 69 | PartialUTF8Char.clear(); |
| 70 | Ptr += BytesFromBuffer; |
| 71 | Size -= BytesFromBuffer; |
| 72 | } |
| 73 | } |
| 74 | |
| 75 | // Now scan the rest of the buffer. |
| 76 | unsigned NumBytes; |
| 77 | for (const char *End = Ptr + Size; Ptr < End; Ptr += NumBytes) { |
| 78 | // Fast path for printable ASCII characters without special handling. |
| 79 | if (*Ptr >= 0x20 && *Ptr <= 0x7e) { |
| 80 | NumBytes = 1; |
| 81 | ++Column; |
| 82 | continue; |
| 83 | } |
| 84 | |
| 85 | NumBytes = getNumBytesForUTF8(firstByte: *Ptr); |
| 86 | |
| 87 | // The buffer might end part way through a UTF-8 code unit sequence for a |
| 88 | // Unicode scalar value if it got flushed. If this happens, we can't know |
| 89 | // the display width until we see the rest of the code point. Stash the |
| 90 | // bytes we do have, so that we can reconstruct the whole code point later, |
| 91 | // even if the buffer is being flushed. |
| 92 | if ((unsigned)(End - Ptr) < NumBytes) { |
| 93 | PartialUTF8Char = StringRef(Ptr, End - Ptr); |
| 94 | return; |
| 95 | } |
| 96 | |
| 97 | ProcessUTF8CodePoint(StringRef(Ptr, NumBytes)); |
| 98 | } |
| 99 | } |
| 100 | |
| 101 | /// ComputePosition - Examine the current output and update line and column |
| 102 | /// counts. |
| 103 | void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { |
| 104 | if (DisableScan) |
| 105 | return; |
| 106 | |
| 107 | // If our previous scan pointer is inside the buffer, assume we already |
| 108 | // scanned those bytes. This depends on raw_ostream to not change our buffer |
| 109 | // in unexpected ways. |
| 110 | if (Ptr <= Scanned && Scanned <= Ptr + Size) |
| 111 | // Scan all characters added since our last scan to determine the new |
| 112 | // column. |
| 113 | UpdatePosition(Ptr: Scanned, Size: Size - (Scanned - Ptr)); |
| 114 | else |
| 115 | UpdatePosition(Ptr, Size); |
| 116 | |
| 117 | // Update the scanning pointer. |
| 118 | Scanned = Ptr + Size; |
| 119 | } |
| 120 | |
| 121 | /// PadToColumn - Align the output to some column number. |
| 122 | /// |
| 123 | /// \param NewCol - The column to move to. |
| 124 | /// |
| 125 | formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { |
| 126 | // Figure out what's in the buffer and add it to the column count. |
| 127 | ComputePosition(Ptr: getBufferStart(), Size: GetNumBytesInBuffer()); |
| 128 | |
| 129 | // Output spaces until we reach the desired column. |
| 130 | indent(NumSpaces: std::max(a: int(NewCol - getColumn()), b: 1)); |
| 131 | return *this; |
| 132 | } |
| 133 | |
| 134 | void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { |
| 135 | // Figure out what's in the buffer and add it to the column count. |
| 136 | ComputePosition(Ptr, Size); |
| 137 | |
| 138 | // Write the data to the underlying stream (which is unbuffered, so |
| 139 | // the data will be immediately written out). |
| 140 | TheStream->write(Ptr, Size); |
| 141 | |
| 142 | // Reset the scanning pointer. |
| 143 | Scanned = nullptr; |
| 144 | } |
| 145 | |
| 146 | /// fouts() - This returns a reference to a formatted_raw_ostream for |
| 147 | /// standard output. Use it like: fouts() << "foo" << "bar"; |
| 148 | formatted_raw_ostream &llvm::fouts() { |
| 149 | static formatted_raw_ostream S(outs()); |
| 150 | return S; |
| 151 | } |
| 152 | |
| 153 | /// ferrs() - This returns a reference to a formatted_raw_ostream for |
| 154 | /// standard error. Use it like: ferrs() << "foo" << "bar"; |
| 155 | formatted_raw_ostream &llvm::ferrs() { |
| 156 | static formatted_raw_ostream S(errs()); |
| 157 | return S; |
| 158 | } |
| 159 | |
| 160 | /// fdbgs() - This returns a reference to a formatted_raw_ostream for |
| 161 | /// the debug stream. Use it like: fdbgs() << "foo" << "bar"; |
| 162 | formatted_raw_ostream &llvm::fdbgs() { |
| 163 | static formatted_raw_ostream S(dbgs()); |
| 164 | return S; |
| 165 | } |
| 166 | |