1 | //===-- NVPTXAsmPrinter.h - NVPTX LLVM assembly writer ----------*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This file contains a printer that converts from our internal representation |
10 | // of machine-dependent LLVM code to NVPTX assembly language. |
11 | // |
12 | //===----------------------------------------------------------------------===// |
13 | |
14 | #ifndef LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H |
15 | #define LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H |
16 | |
17 | #include "NVPTX.h" |
18 | #include "NVPTXSubtarget.h" |
19 | #include "NVPTXTargetMachine.h" |
20 | #include "llvm/ADT/DenseMap.h" |
21 | #include "llvm/ADT/SmallVector.h" |
22 | #include "llvm/ADT/StringRef.h" |
23 | #include "llvm/CodeGen/AsmPrinter.h" |
24 | #include "llvm/CodeGen/MachineFunction.h" |
25 | #include "llvm/CodeGen/MachineLoopInfo.h" |
26 | #include "llvm/IR/Constants.h" |
27 | #include "llvm/IR/DebugLoc.h" |
28 | #include "llvm/IR/DerivedTypes.h" |
29 | #include "llvm/IR/Function.h" |
30 | #include "llvm/IR/GlobalAlias.h" |
31 | #include "llvm/IR/GlobalValue.h" |
32 | #include "llvm/IR/Value.h" |
33 | #include "llvm/MC/MCExpr.h" |
34 | #include "llvm/MC/MCStreamer.h" |
35 | #include "llvm/MC/MCSymbol.h" |
36 | #include "llvm/Pass.h" |
37 | #include "llvm/Support/Casting.h" |
38 | #include "llvm/Support/Compiler.h" |
39 | #include "llvm/Support/ErrorHandling.h" |
40 | #include "llvm/Support/raw_ostream.h" |
41 | #include "llvm/Target/TargetMachine.h" |
42 | #include <algorithm> |
43 | #include <cassert> |
44 | #include <map> |
45 | #include <memory> |
46 | #include <string> |
47 | #include <vector> |
48 | |
49 | // The ptx syntax and format is very different from that usually seem in a .s |
50 | // file, |
51 | // therefore we are not able to use the MCAsmStreamer interface here. |
52 | // |
53 | // We are handcrafting the output method here. |
54 | // |
55 | // A better approach is to clone the MCAsmStreamer to a MCPTXAsmStreamer |
56 | // (subclass of MCStreamer). |
57 | |
58 | namespace llvm { |
59 | |
60 | class MCOperand; |
61 | |
62 | class LLVM_LIBRARY_VISIBILITY NVPTXAsmPrinter : public AsmPrinter { |
63 | |
64 | class AggBuffer { |
65 | // Used to buffer the emitted string for initializing global aggregates. |
66 | // |
67 | // Normally an aggregate (array, vector, or structure) is emitted as a u8[]. |
68 | // However, if either element/field of the aggregate is a non-NULL address, |
69 | // and all such addresses are properly aligned, then the aggregate is |
70 | // emitted as u32[] or u64[]. In the case of unaligned addresses, the |
71 | // aggregate is emitted as u8[], and the mask() operator is used for all |
72 | // pointers. |
73 | // |
74 | // We first layout the aggregate in 'buffer' in bytes, except for those |
75 | // symbol addresses. For the i-th symbol address in the aggregate, its |
76 | // corresponding 4-byte or 8-byte elements in 'buffer' are filled with 0s. |
77 | // symbolPosInBuffer[i-1] records its position in 'buffer', and Symbols[i-1] |
78 | // records the Value*. |
79 | // |
80 | // Once we have this AggBuffer setup, we can choose how to print it out. |
81 | public: |
82 | // number of symbol addresses |
83 | unsigned numSymbols() const { return Symbols.size(); } |
84 | |
85 | bool allSymbolsAligned(unsigned ptrSize) const { |
86 | return llvm::all_of(Range: symbolPosInBuffer, |
87 | P: [=](unsigned pos) { return pos % ptrSize == 0; }); |
88 | } |
89 | |
90 | private: |
91 | const unsigned size; // size of the buffer in bytes |
92 | std::vector<unsigned char> buffer; // the buffer |
93 | SmallVector<unsigned, 4> symbolPosInBuffer; |
94 | SmallVector<const Value *, 4> Symbols; |
95 | // SymbolsBeforeStripping[i] is the original form of Symbols[i] before |
96 | // stripping pointer casts, i.e., |
97 | // Symbols[i] == SymbolsBeforeStripping[i]->stripPointerCasts(). |
98 | // |
99 | // We need to keep these values because AggBuffer::print decides whether to |
100 | // emit a "generic()" cast for Symbols[i] depending on the address space of |
101 | // SymbolsBeforeStripping[i]. |
102 | SmallVector<const Value *, 4> SymbolsBeforeStripping; |
103 | unsigned curpos; |
104 | NVPTXAsmPrinter &AP; |
105 | bool EmitGeneric; |
106 | |
107 | public: |
108 | AggBuffer(unsigned size, NVPTXAsmPrinter &AP) |
109 | : size(size), buffer(size), AP(AP) { |
110 | curpos = 0; |
111 | EmitGeneric = AP.EmitGeneric; |
112 | } |
113 | |
114 | // Copy Num bytes from Ptr. |
115 | // if Bytes > Num, zero fill up to Bytes. |
116 | unsigned addBytes(unsigned char *Ptr, int Num, int Bytes) { |
117 | assert((curpos + Num) <= size); |
118 | assert((curpos + Bytes) <= size); |
119 | for (int i = 0; i < Num; ++i) { |
120 | buffer[curpos] = Ptr[i]; |
121 | curpos++; |
122 | } |
123 | for (int i = Num; i < Bytes; ++i) { |
124 | buffer[curpos] = 0; |
125 | curpos++; |
126 | } |
127 | return curpos; |
128 | } |
129 | |
130 | unsigned addZeros(int Num) { |
131 | assert((curpos + Num) <= size); |
132 | for (int i = 0; i < Num; ++i) { |
133 | buffer[curpos] = 0; |
134 | curpos++; |
135 | } |
136 | return curpos; |
137 | } |
138 | |
139 | void addSymbol(const Value *GVar, const Value *GVarBeforeStripping) { |
140 | symbolPosInBuffer.push_back(Elt: curpos); |
141 | Symbols.push_back(Elt: GVar); |
142 | SymbolsBeforeStripping.push_back(Elt: GVarBeforeStripping); |
143 | } |
144 | |
145 | void printBytes(raw_ostream &os); |
146 | void printWords(raw_ostream &os); |
147 | |
148 | private: |
149 | void printSymbol(unsigned nSym, raw_ostream &os); |
150 | }; |
151 | |
152 | friend class AggBuffer; |
153 | |
154 | private: |
155 | StringRef getPassName() const override { return "NVPTX Assembly Printer" ; } |
156 | |
157 | const Function *F; |
158 | std::string CurrentFnName; |
159 | |
160 | void emitStartOfAsmFile(Module &M) override; |
161 | void emitBasicBlockStart(const MachineBasicBlock &MBB) override; |
162 | void emitFunctionEntryLabel() override; |
163 | void emitFunctionBodyStart() override; |
164 | void emitFunctionBodyEnd() override; |
165 | void emitImplicitDef(const MachineInstr *MI) const override; |
166 | |
167 | void emitInstruction(const MachineInstr *) override; |
168 | void lowerToMCInst(const MachineInstr *MI, MCInst &OutMI); |
169 | bool lowerOperand(const MachineOperand &MO, MCOperand &MCOp); |
170 | MCOperand GetSymbolRef(const MCSymbol *Symbol); |
171 | unsigned encodeVirtualRegister(unsigned Reg); |
172 | |
173 | void printMemOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O, |
174 | const char *Modifier = nullptr); |
175 | void printModuleLevelGV(const GlobalVariable *GVar, raw_ostream &O, |
176 | bool processDemoted, const NVPTXSubtarget &STI); |
177 | void emitGlobals(const Module &M); |
178 | void emitGlobalAlias(const Module &M, const GlobalAlias &GA) override; |
179 | void (Module &M, raw_ostream &O, const NVPTXSubtarget &STI); |
180 | void emitKernelFunctionDirectives(const Function &F, raw_ostream &O) const; |
181 | void emitVirtualRegister(unsigned int vr, raw_ostream &); |
182 | void emitFunctionParamList(const Function *, raw_ostream &O); |
183 | void setAndEmitFunctionVirtualRegisters(const MachineFunction &MF); |
184 | void printReturnValStr(const Function *, raw_ostream &O); |
185 | void printReturnValStr(const MachineFunction &MF, raw_ostream &O); |
186 | bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo, |
187 | const char *, raw_ostream &) override; |
188 | void printOperand(const MachineInstr *MI, unsigned OpNum, raw_ostream &O); |
189 | bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNo, |
190 | const char *, raw_ostream &) override; |
191 | |
192 | const MCExpr *lowerConstantForGV(const Constant *CV, bool ProcessingGeneric); |
193 | void printMCExpr(const MCExpr &Expr, raw_ostream &OS); |
194 | |
195 | protected: |
196 | bool doInitialization(Module &M) override; |
197 | bool doFinalization(Module &M) override; |
198 | |
199 | private: |
200 | bool GlobalsEmitted; |
201 | |
202 | // This is specific per MachineFunction. |
203 | const MachineRegisterInfo *MRI; |
204 | // The contents are specific for each |
205 | // MachineFunction. But the size of the |
206 | // array is not. |
207 | typedef DenseMap<unsigned, unsigned> VRegMap; |
208 | typedef DenseMap<const TargetRegisterClass *, VRegMap> VRegRCMap; |
209 | VRegRCMap VRegMapping; |
210 | |
211 | // List of variables demoted to a function scope. |
212 | std::map<const Function *, std::vector<const GlobalVariable *>> localDecls; |
213 | |
214 | void emitPTXGlobalVariable(const GlobalVariable *GVar, raw_ostream &O, |
215 | const NVPTXSubtarget &STI); |
216 | void emitPTXAddressSpace(unsigned int AddressSpace, raw_ostream &O) const; |
217 | std::string getPTXFundamentalTypeStr(Type *Ty, bool = true) const; |
218 | void printScalarConstant(const Constant *CPV, raw_ostream &O); |
219 | void printFPConstant(const ConstantFP *Fp, raw_ostream &O); |
220 | void bufferLEByte(const Constant *CPV, int Bytes, AggBuffer *aggBuffer); |
221 | void bufferAggregateConstant(const Constant *CV, AggBuffer *aggBuffer); |
222 | |
223 | void emitLinkageDirective(const GlobalValue *V, raw_ostream &O); |
224 | void emitDeclarations(const Module &, raw_ostream &O); |
225 | void emitDeclaration(const Function *, raw_ostream &O); |
226 | void emitAliasDeclaration(const GlobalAlias *, raw_ostream &O); |
227 | void emitDeclarationWithName(const Function *, MCSymbol *, raw_ostream &O); |
228 | void emitDemotedVars(const Function *, raw_ostream &); |
229 | |
230 | bool lowerImageHandleOperand(const MachineInstr *MI, unsigned OpNo, |
231 | MCOperand &MCOp); |
232 | void lowerImageHandleSymbol(unsigned Index, MCOperand &MCOp); |
233 | |
234 | bool (const MachineBasicBlock &MBB) const; |
235 | |
236 | // Used to control the need to emit .generic() in the initializer of |
237 | // module scope variables. |
238 | // Although ptx supports the hybrid mode like the following, |
239 | // .global .u32 a; |
240 | // .global .u32 b; |
241 | // .global .u32 addr[] = {a, generic(b)} |
242 | // we have difficulty representing the difference in the NVVM IR. |
243 | // |
244 | // Since the address value should always be generic in CUDA C and always |
245 | // be specific in OpenCL, we use this simple control here. |
246 | // |
247 | bool EmitGeneric; |
248 | |
249 | public: |
250 | NVPTXAsmPrinter(TargetMachine &TM, std::unique_ptr<MCStreamer> Streamer) |
251 | : AsmPrinter(TM, std::move(Streamer)), |
252 | EmitGeneric(static_cast<NVPTXTargetMachine &>(TM).getDrvInterface() == |
253 | NVPTX::CUDA) {} |
254 | |
255 | bool runOnMachineFunction(MachineFunction &F) override; |
256 | |
257 | void getAnalysisUsage(AnalysisUsage &AU) const override { |
258 | AU.addRequired<MachineLoopInfoWrapperPass>(); |
259 | AsmPrinter::getAnalysisUsage(AU); |
260 | } |
261 | |
262 | std::string getVirtualRegisterName(unsigned) const; |
263 | |
264 | const MCSymbol *getFunctionFrameSymbol() const override; |
265 | |
266 | // Make emitGlobalVariable() no-op for NVPTX. |
267 | // Global variables have been already emitted by the time the base AsmPrinter |
268 | // attempts to do so in doFinalization() (see NVPTXAsmPrinter::emitGlobals()). |
269 | void emitGlobalVariable(const GlobalVariable *GV) override {} |
270 | }; |
271 | |
272 | } // end namespace llvm |
273 | |
274 | #endif // LLVM_LIB_TARGET_NVPTX_NVPTXASMPRINTER_H |
275 | |