1//===--- ByteCodeEmitter.cpp - Instruction emitter for the VM ---*- C++ -*-===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8
9#include "ByteCodeEmitter.h"
10#include "Context.h"
11#include "Floating.h"
12#include "IntegralAP.h"
13#include "Opcode.h"
14#include "Program.h"
15#include "clang/AST/ASTLambda.h"
16#include "clang/AST/Attr.h"
17#include "clang/AST/DeclCXX.h"
18#include "clang/Basic/Builtins.h"
19#include <type_traits>
20
21using namespace clang;
22using namespace clang::interp;
23
24/// Unevaluated builtins don't get their arguments put on the stack
25/// automatically. They instead operate on the AST of their Call
26/// Expression.
27/// Similar information is available via ASTContext::BuiltinInfo,
28/// but that is not correct for our use cases.
29static bool isUnevaluatedBuiltin(unsigned BuiltinID) {
30 return BuiltinID == Builtin::BI__builtin_classify_type ||
31 BuiltinID == Builtin::BI__builtin_os_log_format_buffer_size;
32}
33
34Function *ByteCodeEmitter::compileFunc(const FunctionDecl *FuncDecl) {
35
36 // Manually created functions that haven't been assigned proper
37 // parameters yet.
38 if (!FuncDecl->param_empty() && !FuncDecl->param_begin())
39 return nullptr;
40
41 bool IsLambdaStaticInvoker = false;
42 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl);
43 MD && MD->isLambdaStaticInvoker()) {
44 // For a lambda static invoker, we might have to pick a specialized
45 // version if the lambda is generic. In that case, the picked function
46 // will *NOT* be a static invoker anymore. However, it will still
47 // be a non-static member function, this (usually) requiring an
48 // instance pointer. We suppress that later in this function.
49 IsLambdaStaticInvoker = true;
50
51 const CXXRecordDecl *ClosureClass = MD->getParent();
52 assert(ClosureClass->captures_begin() == ClosureClass->captures_end());
53 if (ClosureClass->isGenericLambda()) {
54 const CXXMethodDecl *LambdaCallOp = ClosureClass->getLambdaCallOperator();
55 assert(MD->isFunctionTemplateSpecialization() &&
56 "A generic lambda's static-invoker function must be a "
57 "template specialization");
58 const TemplateArgumentList *TAL = MD->getTemplateSpecializationArgs();
59 FunctionTemplateDecl *CallOpTemplate =
60 LambdaCallOp->getDescribedFunctionTemplate();
61 void *InsertPos = nullptr;
62 const FunctionDecl *CorrespondingCallOpSpecialization =
63 CallOpTemplate->findSpecialization(Args: TAL->asArray(), InsertPos);
64 assert(CorrespondingCallOpSpecialization);
65 FuncDecl = cast<CXXMethodDecl>(Val: CorrespondingCallOpSpecialization);
66 }
67 }
68
69 // Set up argument indices.
70 unsigned ParamOffset = 0;
71 SmallVector<PrimType, 8> ParamTypes;
72 SmallVector<unsigned, 8> ParamOffsets;
73 llvm::DenseMap<unsigned, Function::ParamDescriptor> ParamDescriptors;
74
75 // If the return is not a primitive, a pointer to the storage where the
76 // value is initialized in is passed as the first argument. See 'RVO'
77 // elsewhere in the code.
78 QualType Ty = FuncDecl->getReturnType();
79 bool HasRVO = false;
80 if (!Ty->isVoidType() && !Ctx.classify(T: Ty)) {
81 HasRVO = true;
82 ParamTypes.push_back(Elt: PT_Ptr);
83 ParamOffsets.push_back(Elt: ParamOffset);
84 ParamOffset += align(Size: primSize(Type: PT_Ptr));
85 }
86
87 // If the function decl is a member decl, the next parameter is
88 // the 'this' pointer. This parameter is pop()ed from the
89 // InterpStack when calling the function.
90 bool HasThisPointer = false;
91 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl)) {
92 if (!IsLambdaStaticInvoker) {
93 HasThisPointer = MD->isInstance();
94 if (MD->isImplicitObjectMemberFunction()) {
95 ParamTypes.push_back(Elt: PT_Ptr);
96 ParamOffsets.push_back(Elt: ParamOffset);
97 ParamOffset += align(Size: primSize(Type: PT_Ptr));
98 }
99 }
100
101 // Set up lambda capture to closure record field mapping.
102 if (isLambdaCallOperator(MD)) {
103 // The parent record needs to be complete, we need to know about all
104 // the lambda captures.
105 if (!MD->getParent()->isCompleteDefinition())
106 return nullptr;
107
108 const Record *R = P.getOrCreateRecord(RD: MD->getParent());
109 llvm::DenseMap<const ValueDecl *, FieldDecl *> LC;
110 FieldDecl *LTC;
111
112 MD->getParent()->getCaptureFields(Captures&: LC, ThisCapture&: LTC);
113
114 for (auto Cap : LC) {
115 // Static lambdas cannot have any captures. If this one does,
116 // it has already been diagnosed and we can only ignore it.
117 if (MD->isStatic())
118 return nullptr;
119
120 unsigned Offset = R->getField(FD: Cap.second)->Offset;
121 this->LambdaCaptures[Cap.first] = {
122 .Offset: Offset, .IsPtr: Cap.second->getType()->isReferenceType()};
123 }
124 if (LTC) {
125 QualType CaptureType = R->getField(FD: LTC)->Decl->getType();
126 this->LambdaThisCapture = {.Offset: R->getField(FD: LTC)->Offset,
127 .IsPtr: CaptureType->isReferenceType() ||
128 CaptureType->isPointerType()};
129 }
130 }
131 }
132
133 // Assign descriptors to all parameters.
134 // Composite objects are lowered to pointers.
135 for (const ParmVarDecl *PD : FuncDecl->parameters()) {
136 std::optional<PrimType> T = Ctx.classify(T: PD->getType());
137 PrimType PT = T.value_or(u: PT_Ptr);
138 Descriptor *Desc = P.createDescriptor(D: PD, Type: PT);
139 ParamDescriptors.insert(KV: {ParamOffset, {PT, Desc}});
140 Params.insert(KV: {PD, {.Offset: ParamOffset, .IsPtr: T != std::nullopt}});
141 ParamOffsets.push_back(Elt: ParamOffset);
142 ParamOffset += align(Size: primSize(Type: PT));
143 ParamTypes.push_back(Elt: PT);
144 }
145
146 // Create a handle over the emitted code.
147 Function *Func = P.getFunction(F: FuncDecl);
148 if (!Func) {
149 bool IsUnevaluatedBuiltin = false;
150 if (unsigned BI = FuncDecl->getBuiltinID())
151 IsUnevaluatedBuiltin = isUnevaluatedBuiltin(BuiltinID: BI);
152
153 Func =
154 P.createFunction(Def: FuncDecl, Args&: ParamOffset, Args: std::move(ParamTypes),
155 Args: std::move(ParamDescriptors), Args: std::move(ParamOffsets),
156 Args&: HasThisPointer, Args&: HasRVO, Args&: IsUnevaluatedBuiltin);
157 }
158
159 assert(Func);
160 // For not-yet-defined functions, we only create a Function instance and
161 // compile their body later.
162 if (!FuncDecl->isDefined() ||
163 (FuncDecl->willHaveBody() && !FuncDecl->hasBody())) {
164 Func->setDefined(false);
165 return Func;
166 }
167
168 Func->setDefined(true);
169
170 // Lambda static invokers are a special case that we emit custom code for.
171 bool IsEligibleForCompilation = false;
172 if (const auto *MD = dyn_cast<CXXMethodDecl>(Val: FuncDecl))
173 IsEligibleForCompilation = MD->isLambdaStaticInvoker();
174 if (!IsEligibleForCompilation)
175 IsEligibleForCompilation =
176 FuncDecl->isConstexpr() || FuncDecl->hasAttr<MSConstexprAttr>();
177
178 // Compile the function body.
179 if (!IsEligibleForCompilation || !visitFunc(E: FuncDecl)) {
180 Func->setIsFullyCompiled(true);
181 return Func;
182 }
183
184 // Create scopes from descriptors.
185 llvm::SmallVector<Scope, 2> Scopes;
186 for (auto &DS : Descriptors) {
187 Scopes.emplace_back(Args: std::move(DS));
188 }
189
190 // Set the function's code.
191 Func->setCode(NewFrameSize: NextLocalOffset, NewCode: std::move(Code), NewSrcMap: std::move(SrcMap),
192 NewScopes: std::move(Scopes), NewHasBody: FuncDecl->hasBody());
193 Func->setIsFullyCompiled(true);
194 return Func;
195}
196
197Scope::Local ByteCodeEmitter::createLocal(Descriptor *D) {
198 NextLocalOffset += sizeof(Block);
199 unsigned Location = NextLocalOffset;
200 NextLocalOffset += align(Size: D->getAllocSize());
201 return {.Offset: Location, .Desc: D};
202}
203
204void ByteCodeEmitter::emitLabel(LabelTy Label) {
205 const size_t Target = Code.size();
206 LabelOffsets.insert(KV: {Label, Target});
207
208 if (auto It = LabelRelocs.find(Val: Label);
209 It != LabelRelocs.end()) {
210 for (unsigned Reloc : It->second) {
211 using namespace llvm::support;
212
213 // Rewrite the operand of all jumps to this label.
214 void *Location = Code.data() + Reloc - align(Size: sizeof(int32_t));
215 assert(aligned(Location));
216 const int32_t Offset = Target - static_cast<int64_t>(Reloc);
217 endian::write<int32_t, llvm::endianness::native>(P: Location, V: Offset);
218 }
219 LabelRelocs.erase(I: It);
220 }
221}
222
223int32_t ByteCodeEmitter::getOffset(LabelTy Label) {
224 // Compute the PC offset which the jump is relative to.
225 const int64_t Position =
226 Code.size() + align(Size: sizeof(Opcode)) + align(Size: sizeof(int32_t));
227 assert(aligned(Position));
228
229 // If target is known, compute jump offset.
230 if (auto It = LabelOffsets.find(Val: Label);
231 It != LabelOffsets.end())
232 return It->second - Position;
233
234 // Otherwise, record relocation and return dummy offset.
235 LabelRelocs[Label].push_back(Elt: Position);
236 return 0ull;
237}
238
239/// Helper to write bytecode and bail out if 32-bit offsets become invalid.
240/// Pointers will be automatically marshalled as 32-bit IDs.
241template <typename T>
242static void emit(Program &P, std::vector<std::byte> &Code, const T &Val,
243 bool &Success) {
244 size_t Size;
245
246 if constexpr (std::is_pointer_v<T>)
247 Size = sizeof(uint32_t);
248 else
249 Size = sizeof(T);
250
251 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
252 Success = false;
253 return;
254 }
255
256 // Access must be aligned!
257 size_t ValPos = align(Size: Code.size());
258 Size = align(Size);
259 assert(aligned(ValPos + Size));
260 Code.resize(new_size: ValPos + Size);
261
262 if constexpr (!std::is_pointer_v<T>) {
263 new (Code.data() + ValPos) T(Val);
264 } else {
265 uint32_t ID = P.getOrCreateNativePointer(Ptr: Val);
266 new (Code.data() + ValPos) uint32_t(ID);
267 }
268}
269
270/// Emits a serializable value. These usually (potentially) contain
271/// heap-allocated memory and aren't trivially copyable.
272template <typename T>
273static void emitSerialized(std::vector<std::byte> &Code, const T &Val,
274 bool &Success) {
275 size_t Size = Val.bytesToSerialize();
276
277 if (Code.size() + Size > std::numeric_limits<unsigned>::max()) {
278 Success = false;
279 return;
280 }
281
282 // Access must be aligned!
283 size_t ValPos = align(Size: Code.size());
284 Size = align(Size);
285 assert(aligned(ValPos + Size));
286 Code.resize(new_size: ValPos + Size);
287
288 Val.serialize(Code.data() + ValPos);
289}
290
291template <>
292void emit(Program &P, std::vector<std::byte> &Code, const Floating &Val,
293 bool &Success) {
294 emitSerialized(Code, Val, Success);
295}
296
297template <>
298void emit(Program &P, std::vector<std::byte> &Code,
299 const IntegralAP<false> &Val, bool &Success) {
300 emitSerialized(Code, Val, Success);
301}
302
303template <>
304void emit(Program &P, std::vector<std::byte> &Code, const IntegralAP<true> &Val,
305 bool &Success) {
306 emitSerialized(Code, Val, Success);
307}
308
309template <typename... Tys>
310bool ByteCodeEmitter::emitOp(Opcode Op, const Tys &... Args, const SourceInfo &SI) {
311 bool Success = true;
312
313 // The opcode is followed by arguments. The source info is
314 // attached to the address after the opcode.
315 emit(P, Code, Val: Op, Success);
316 if (SI)
317 SrcMap.emplace_back(args: Code.size(), args: SI);
318
319 (..., emit(P, Code, Args, Success));
320 return Success;
321}
322
323bool ByteCodeEmitter::jumpTrue(const LabelTy &Label) {
324 return emitJt(getOffset(Label), SourceInfo{});
325}
326
327bool ByteCodeEmitter::jumpFalse(const LabelTy &Label) {
328 return emitJf(getOffset(Label), SourceInfo{});
329}
330
331bool ByteCodeEmitter::jump(const LabelTy &Label) {
332 return emitJmp(getOffset(Label), SourceInfo{});
333}
334
335bool ByteCodeEmitter::fallthrough(const LabelTy &Label) {
336 emitLabel(Label);
337 return true;
338}
339
340//===----------------------------------------------------------------------===//
341// Opcode emitters
342//===----------------------------------------------------------------------===//
343
344#define GET_LINK_IMPL
345#include "Opcodes.inc"
346#undef GET_LINK_IMPL
347