1 | //===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | |
9 | #include "ByteCodeEmitter.h" |
10 | #include "Context.h" |
11 | #include "Floating.h" |
12 | #include "IntegralAP.h" |
13 | #include "Opcode.h" |
14 | #include "Program.h" |
15 | #include "clang/AST/ASTLambda.h" |
16 | #include "clang/AST/Attr.h" |
17 | #include "clang/AST/DeclCXX.h" |
18 | #include "clang/Basic/Builtins.h" |
19 | #include <type_traits> |
20 | |
21 | using namespace clang; |
22 | using namespace clang::interp; |
23 | |
24 | /// Unevaluated builtins don't get their arguments put on the stack |
25 | /// automatically. They instead operate on the AST of their Call |
26 | /// Expression. |
27 | /// Similar information is available via ASTContext::BuiltinInfo, |
28 | /// but that is not correct for our use cases. |
29 | static bool isUnevaluatedBuiltin(unsigned BuiltinID) { |
30 | return BuiltinID == Builtin::BI__builtin_classify_type || |
31 | BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size; |
32 | } |
33 | |
34 | Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) { |
35 | |
36 | // Manually created functions that haven't been assigned proper |
37 | // parameters yet. |
38 | if (!FuncDecl->param_empty() && !FuncDecl->param_begin()) |
39 | return nullptr; |
40 | |
41 | bool IsLambdaStaticInvoker = false; |
42 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl); |
43 | MD && MD->isLambdaStaticInvoker()) { |
44 | // For a lambda static invoker, we might have to pick a specialized |
45 | // version if the lambda is generic. In that case, the picked function |
46 | // will *NOT* be a static invoker anymore. However, it will still |
47 | // be a non-static member function, this (usually) requiring an |
48 | // instance pointer. We suppress that later in this function. |
49 | IsLambdaStaticInvoker = true; |
50 | |
51 | const CXXRecordDecl *ClosureClass = MD->getParent(); |
52 | assert(ClosureClass->captures_begin() == ClosureClass->captures_end()); |
53 | if (ClosureClass->isGenericLambda()) { |
54 | const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator(); |
55 | assert(MD->isFunctionTemplateSpecialization() && |
56 | "A generic lambda's static-invoker function must be a " |
57 | "template specialization" ); |
58 | const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs(); |
59 | FunctionTemplateDecl *CallOpTemplate = |
60 | LambdaCallOp->getDescribedFunctionTemplate(); |
61 | void *InsertPos = nullptr; |
62 | const FunctionDecl *CorrespondingCallOpSpecialization = |
63 | CallOpTemplate->findSpecialization(Args: TAL->asArray(), InsertPos); |
64 | assert(CorrespondingCallOpSpecialization); |
65 | FuncDecl = cast<CXXMethodDecl>(Val: CorrespondingCallOpSpecialization); |
66 | } |
67 | } |
68 | |
69 | // Set up argument indices. |
70 | unsigned ParamOffset = 0; |
71 | SmallVector<PrimType, 8> ParamTypes; |
72 | SmallVector<unsigned, 8> ParamOffsets; |
73 | llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors; |
74 | |
75 | // If the return is not a primitive, a pointer to the storage where the |
76 | // value is initialized in is passed as the first argument. See 'RVO' |
77 | // elsewhere in the code. |
78 | QualType Ty = FuncDecl->getReturnType(); |
79 | bool HasRVO = false; |
80 | if (!Ty->isVoidType() && !Ctx.classify(T: Ty)) { |
81 | HasRVO = true; |
82 | ParamTypes.push_back(Elt: PT_Ptr); |
83 | ParamOffsets.push_back(Elt: ParamOffset); |
84 | ParamOffset += align(Size: primSize(Type: PT_Ptr)); |
85 | } |
86 | |
87 | // If the function decl is a member decl, the next parameter is |
88 | // the 'this' pointer. This parameter is pop()ed from the |
89 | // InterpStack when calling the function. |
90 | bool HasThisPointer = false; |
91 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl)) { |
92 | if (!IsLambdaStaticInvoker) { |
93 | HasThisPointer = MD->isInstance(); |
94 | if (MD->isImplicitObjectMemberFunction()) { |
95 | ParamTypes.push_back(Elt: PT_Ptr); |
96 | ParamOffsets.push_back(Elt: ParamOffset); |
97 | ParamOffset += align(Size: primSize(Type: PT_Ptr)); |
98 | } |
99 | } |
100 | |
101 | // Set up lambda capture to closure record field mapping. |
102 | if (isLambdaCallOperator(MD)) { |
103 | // The parent record needs to be complete, we need to know about all |
104 | // the lambda captures. |
105 | if (!MD->getParent()->isCompleteDefinition()) |
106 | return nullptr; |
107 | |
108 | const Record *R = P.getOrCreateRecord(RD: MD->getParent()); |
109 | llvm::DenseMap<const ValueDecl *, FieldDecl *> LC; |
110 | FieldDecl *LTC; |
111 | |
112 | MD->getParent()->getCaptureFields(Captures&: LC, ThisCapture&: LTC); |
113 | |
114 | for (auto Cap : LC) { |
115 | // Static lambdas cannot have any captures. If this one does, |
116 | // it has already been diagnosed and we can only ignore it. |
117 | if (MD->isStatic()) |
118 | return nullptr; |
119 | |
120 | unsigned Offset = R->getField(FD: Cap.second)->Offset; |
121 | this->LambdaCaptures[Cap.first] = { |
122 | .Offset: Offset, .IsPtr: Cap.second->getType()->isReferenceType()}; |
123 | } |
124 | if (LTC) { |
125 | QualType CaptureType = R->getField(FD: LTC)->Decl->getType(); |
126 | this->LambdaThisCapture = {.Offset: R->getField(FD: LTC)->Offset, |
127 | .IsPtr: CaptureType->isReferenceType() || |
128 | CaptureType->isPointerType()}; |
129 | } |
130 | } |
131 | } |
132 | |
133 | // Assign descriptors to all parameters. |
134 | // Composite objects are lowered to pointers. |
135 | for (const ParmVarDecl *PD : FuncDecl->parameters()) { |
136 | std::optional<PrimType> T = Ctx.classify(T: PD->getType()); |
137 | PrimType PT = T.value_or(u: PT_Ptr); |
138 | Descriptor *Desc = P.createDescriptor(D: PD, Type: PT); |
139 | ParamDescriptors.insert(KV: {ParamOffset, {PT, Desc}}); |
140 | Params.insert(KV: {PD, {.Offset: ParamOffset, .IsPtr: T != std::nullopt}}); |
141 | ParamOffsets.push_back(Elt: ParamOffset); |
142 | ParamOffset += align(Size: primSize(Type: PT)); |
143 | ParamTypes.push_back(Elt: PT); |
144 | } |
145 | |
146 | // Create a handle over the emitted code. |
147 | Function *Func = P.getFunction(F: FuncDecl); |
148 | if (!Func) { |
149 | bool IsUnevaluatedBuiltin = false; |
150 | if (unsigned BI = FuncDecl->getBuiltinID()) |
151 | IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BuiltinID: BI); |
152 | |
153 | Func = |
154 | P.createFunction(Def: FuncDecl, Args&: ParamOffset, Args: std::move(ParamTypes), |
155 | Args: std::move(ParamDescriptors), Args: std::move(ParamOffsets), |
156 | Args&: HasThisPointer, Args&: HasRVO, Args&: IsUnevaluatedBuiltin); |
157 | } |
158 | |
159 | assert(Func); |
160 | // For not-yet-defined functions, we only create a Function instance and |
161 | // compile their body later. |
162 | if (!FuncDecl->isDefined() || |
163 | (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) { |
164 | Func->setDefined(false); |
165 | return Func; |
166 | } |
167 | |
168 | Func->setDefined(true); |
169 | |
170 | // Lambda static invokers are a special case that we emit custom code for. |
171 | bool IsEligibleForCompilation = false; |
172 | if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl)) |
173 | IsEligibleForCompilation = MD->isLambdaStaticInvoker(); |
174 | if (!IsEligibleForCompilation) |
175 | IsEligibleForCompilation = |
176 | FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>(); |
177 | |
178 | // Compile the function body. |
179 | if (!IsEligibleForCompilation || !visitFunc(E: FuncDecl)) { |
180 | Func->setIsFullyCompiled(true); |
181 | return Func; |
182 | } |
183 | |
184 | // Create scopes from descriptors. |
185 | llvm::SmallVector<Scope, 2> Scopes; |
186 | for (auto &DS : Descriptors) { |
187 | Scopes.emplace_back(Args: std::move(DS)); |
188 | } |
189 | |
190 | // Set the function's code. |
191 | Func->setCode(NewFrameSize: NextLocalOffset, NewCode: std::move(Code), NewSrcMap: std::move(SrcMap), |
192 | NewScopes: std::move(Scopes), NewHasBody: FuncDecl->hasBody()); |
193 | Func->setIsFullyCompiled(true); |
194 | return Func; |
195 | } |
196 | |
197 | Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) { |
198 | NextLocalOffset += sizeof(Block); |
199 | unsigned Location = NextLocalOffset; |
200 | NextLocalOffset += align(Size: D->getAllocSize()); |
201 | return {.Offset: Location, .Desc: D}; |
202 | } |
203 | |
204 | void ByteCodeEmitter::emitLabel(LabelTy Label) { |
205 | const size_t Target = Code.size(); |
206 | LabelOffsets.insert(KV: {Label, Target}); |
207 | |
208 | if (auto It = LabelRelocs.find(Val: Label); |
209 | It != LabelRelocs.end()) { |
210 | for (unsigned Reloc : It->second) { |
211 | using namespace llvm::support; |
212 | |
213 | // Rewrite the operand of all jumps to this label. |
214 | void *Location = Code.data() + Reloc - align(Size: sizeof(int32_t)); |
215 | assert(aligned(Location)); |
216 | const int32_t Offset = Target - static_cast<int64_t>(Reloc); |
217 | endian::write<int32_t, llvm::endianness::native>(P: Location, V: Offset); |
218 | } |
219 | LabelRelocs.erase(I: It); |
220 | } |
221 | } |
222 | |
223 | int32_t ByteCodeEmitter::getOffset(LabelTy Label) { |
224 | // Compute the PC offset which the jump is relative to. |
225 | const int64_t Position = |
226 | Code.size() + align(Size: sizeof(Opcode)) + align(Size: sizeof(int32_t)); |
227 | assert(aligned(Position)); |
228 | |
229 | // If target is known, compute jump offset. |
230 | if (auto It = LabelOffsets.find(Val: Label); |
231 | It != LabelOffsets.end()) |
232 | return It->second - Position; |
233 | |
234 | // Otherwise, record relocation and return dummy offset. |
235 | LabelRelocs[Label].push_back(Elt: Position); |
236 | return 0ull; |
237 | } |
238 | |
239 | /// Helper to write bytecode and bail out if 32-bit offsets become invalid. |
240 | /// Pointers will be automatically marshalled as 32-bit IDs. |
241 | template <typename T> |
242 | static void emit(Program &P, std::vector<std::byte> &Code, const T &Val, |
243 | bool &Success) { |
244 | size_t Size; |
245 | |
246 | if constexpr (std::is_pointer_v<T>) |
247 | Size = sizeof(uint32_t); |
248 | else |
249 | Size = sizeof(T); |
250 | |
251 | if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { |
252 | Success = false; |
253 | return; |
254 | } |
255 | |
256 | // Access must be aligned! |
257 | size_t ValPos = align(Size: Code.size()); |
258 | Size = align(Size); |
259 | assert(aligned(ValPos + Size)); |
260 | Code.resize(new_size: ValPos + Size); |
261 | |
262 | if constexpr (!std::is_pointer_v<T>) { |
263 | new (Code.data() + ValPos) T(Val); |
264 | } else { |
265 | uint32_t ID = P.getOrCreateNativePointer(Ptr: Val); |
266 | new (Code.data() + ValPos) uint32_t(ID); |
267 | } |
268 | } |
269 | |
270 | /// Emits a serializable value. These usually (potentially) contain |
271 | /// heap-allocated memory and aren't trivially copyable. |
272 | template <typename T> |
273 | static void emitSerialized(std::vector<std::byte> &Code, const T &Val, |
274 | bool &Success) { |
275 | size_t Size = Val.bytesToSerialize(); |
276 | |
277 | if (Code.size() + Size > std::numeric_limits<unsigned>::max()) { |
278 | Success = false; |
279 | return; |
280 | } |
281 | |
282 | // Access must be aligned! |
283 | size_t ValPos = align(Size: Code.size()); |
284 | Size = align(Size); |
285 | assert(aligned(ValPos + Size)); |
286 | Code.resize(new_size: ValPos + Size); |
287 | |
288 | Val.serialize(Code.data() + ValPos); |
289 | } |
290 | |
291 | template <> |
292 | void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val, |
293 | bool &Success) { |
294 | emitSerialized(Code, Val, Success); |
295 | } |
296 | |
297 | template <> |
298 | void emit(Program &P, std::vector<std::byte> &Code, |
299 | const IntegralAP<false> &Val, bool &Success) { |
300 | emitSerialized(Code, Val, Success); |
301 | } |
302 | |
303 | template <> |
304 | void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val, |
305 | bool &Success) { |
306 | emitSerialized(Code, Val, Success); |
307 | } |
308 | |
309 | template <typename... Tys> |
310 | bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) { |
311 | bool Success = true; |
312 | |
313 | // The opcode is followed by arguments. The source info is |
314 | // attached to the address after the opcode. |
315 | emit(P, Code, Val: Op, Success); |
316 | if (SI) |
317 | SrcMap.emplace_back(args: Code.size(), args: SI); |
318 | |
319 | (..., emit(P, Code, Args, Success)); |
320 | return Success; |
321 | } |
322 | |
323 | bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) { |
324 | return emitJt(getOffset(Label), SourceInfo{}); |
325 | } |
326 | |
327 | bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) { |
328 | return emitJf(getOffset(Label), SourceInfo{}); |
329 | } |
330 | |
331 | bool ByteCodeEmitter::jump(const LabelTy &Label) { |
332 | return emitJmp(getOffset(Label), SourceInfo{}); |
333 | } |
334 | |
335 | bool ByteCodeEmitter::fallthrough(const LabelTy &Label) { |
336 | emitLabel(Label); |
337 | return true; |
338 | } |
339 | |
340 | //===----------------------------------------------------------------------===// |
341 | // Opcode emitters |
342 | //===----------------------------------------------------------------------===// |
343 | |
344 | #define GET_LINK_IMPL |
345 | #include "Opcodes.inc" |
346 | #undef GET_LINK_IMPL |
347 | |