1//===- Library.cpp - Library calls for llubi ------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements common libcalls for llubi.
10//
11//===----------------------------------------------------------------------===//
12
13#include "Library.h"
14#include "llvm/ADT/STLExtras.h"
15#include "llvm/Analysis/TargetLibraryInfo.h"
16#include "llvm/IR/InstrTypes.h"
17#include "llvm/Support/Format.h"
18#include "llvm/Support/raw_ostream.h"
19
20namespace llvm::ubi {
21
22static uint64_t getMaxAlign(const DataLayout &DL) {
23 // Return an alignment of 16 for 64-bit platforms, and 8 for 32-bit ones.
24 return DL.getPointerABIAlignment(AS: 0).value() >= 8 ? 16 : 8;
25}
26
27Library::Library(Context &Ctx, EventHandler &Handler, const DataLayout &DL,
28 ExecutorBase &Executor)
29 : Ctx(Ctx), Handler(Handler), DL(DL), Executor(Executor) {}
30
31std::optional<std::string> Library::readStringFromMemory(const Pointer &Ptr) {
32 std::string Result;
33 const APInt &Address = Ptr.address();
34 uint64_t Offset = 0;
35
36 while (true) {
37 auto [MO, ValidOffset] = Executor.verifyMemAccess(
38 Ptr: Ptr.getWithNewAddr(NewAddr: Address + Offset), AccessSize: 1, Alignment: Align(1), /*IsStore=*/false);
39 if (!MO)
40 return std::nullopt;
41
42 Byte B = (*MO)[ValidOffset];
43 if (B.ConcreteMask != 0xFF) {
44 Executor.reportImmediateUB()
45 << "Read uninitialized or poison memory while "
46 "parsing C-string at offset "
47 << Offset << ".";
48 return std::nullopt;
49 }
50
51 if (B.Value == 0)
52 break;
53
54 Result.push_back(c: static_cast<char>(B.Value));
55 ++Offset;
56 }
57
58 return Result;
59}
60
61AnyValue Library::executeMalloc(StringRef Name, Type *Type,
62 ArrayRef<AnyValue> Args,
63 MemAllocKind AllocKind) {
64 assert((AllocKind == MemAllocKind::Malloc || AllocKind == MemAllocKind::New ||
65 AllocKind == MemAllocKind::NewArray) &&
66 "Unexpected MemAllocKind for malloc()/new/new[]");
67
68 const auto &SizeVal = Args[0];
69
70 const uint64_t AllocSize = SizeVal.asInteger().getZExtValue();
71
72 const IntrusiveRefCntPtr<MemoryObject> Obj =
73 Ctx.allocate(Size: AllocSize, Align: getMaxAlign(DL), Name, AS: 0,
74 InitKind: MemInitKind::Uninitialized, AllocKind);
75
76 if (!Obj) {
77 if (AllocKind == MemAllocKind::New || AllocKind == MemAllocKind::NewArray) {
78 // FIXME: As llubi doesn't support stack unwinding yet, we report an error
79 // when new/new[] fails.
80 Executor.reportError() << "Insufficient heap space.";
81 return AnyValue::poison();
82 }
83 return AnyValue::getNullValue(Ctx, Ty: Type);
84 }
85
86 return Ctx.deriveFromMemoryObject(Obj);
87}
88
89AnyValue Library::executeCalloc(StringRef Name, Type *Type,
90 ArrayRef<AnyValue> Args,
91 MemAllocKind AllocKind) {
92 assert(AllocKind == MemAllocKind::Malloc &&
93 "Unexpected MemAllocKind for calloc()");
94
95 const auto &CountVal = Args[0];
96 const auto &SizeVal = Args[1];
97
98 const APInt &Count = CountVal.asInteger();
99 const APInt &Size = SizeVal.asInteger();
100
101 bool Overflow = false;
102 const APInt AllocSize = Count.umul_ov(RHS: Size, Overflow);
103 if (Overflow)
104 return AnyValue::getNullValue(Ctx, Ty: Type);
105
106 const IntrusiveRefCntPtr<MemoryObject> Obj =
107 Ctx.allocate(Size: AllocSize.getLimitedValue(), Align: getMaxAlign(DL), Name, AS: 0,
108 InitKind: MemInitKind::Zeroed, AllocKind);
109
110 if (!Obj)
111 return AnyValue::getNullValue(Ctx, Ty: Type);
112
113 return Ctx.deriveFromMemoryObject(Obj);
114}
115
116AnyValue Library::executeFree(ArrayRef<AnyValue> Args) {
117 const auto &PtrVal = Args[0];
118
119 auto &Ptr = PtrVal.asPointer();
120 // no-op when free is called with a null pointer.
121 if (Ptr.isNullPtr(/*AS=*/0, DL))
122 return AnyValue();
123
124 MemoryObject *Obj = Ctx.checkProvenance(Ptr, Check: [](const Provenance &) {
125 // TODO: check nofree
126 return true;
127 });
128 if (!Obj) {
129 Executor.reportImmediateUB()
130 << "freeing a pointer with nullary provenance.";
131 return AnyValue();
132 }
133
134 if (const uint64_t Address = Ptr.address().getZExtValue();
135 Address != Obj->getAddress()) {
136 Executor.reportImmediateUB()
137 << "freeing a pointer that does not point to "
138 "the start of an allocation. Pointer address: 0x"
139 << Twine::utohexstr(Val: Address) << ", allocation base: 0x"
140 << Twine::utohexstr(Val: Obj->getAddress()) << ".";
141 return AnyValue();
142 }
143
144 if (Obj->getState() == MemoryObjectState::Freed) {
145 Executor.reportImmediateUB()
146 << "double-freeing a memory object allocated at 0x"
147 << Twine::utohexstr(Val: Obj->getAddress()) << ".";
148 return AnyValue();
149 }
150
151 if (!Obj->isHeapAllocated()) {
152 Executor.reportImmediateUB() << "freeing a non-heap allocation at 0x"
153 << Twine::utohexstr(Val: Obj->getAddress()) << ".";
154 return AnyValue();
155 }
156
157 // Currently we don't check for cases where a memory allocated with C
158 // allocation family (malloc, calloc, etc.) is freed with a different free
159 // function comes from a different family (C++ delete, etc.)
160
161 if (!Ctx.free(Obj: *Obj)) {
162 Executor.reportImmediateUB()
163 << "freeing an invalid pointer at 0x"
164 << Twine::utohexstr(Val: Ptr.address().getZExtValue()) << ".";
165 return AnyValue::poison();
166 }
167
168 return AnyValue();
169}
170
171AnyValue Library::executePuts(ArrayRef<AnyValue> Args) {
172 const auto &PtrVal = Args[0];
173
174 const auto StrOpt = readStringFromMemory(Ptr: PtrVal.asPointer());
175 if (!StrOpt)
176 return AnyValue::poison();
177
178 Handler.onPrint(Msg: *StrOpt + "\n");
179 return AnyValue(APInt(Executor.getIntSize(), 1));
180}
181
182AnyValue Library::executePrintf(ArrayRef<AnyValue> Args) {
183 const auto &FormatPtrVal = Args[0];
184
185 const auto FormatStrOpt = readStringFromMemory(Ptr: FormatPtrVal.asPointer());
186 if (!FormatStrOpt)
187 return AnyValue::poison();
188
189 const std::string &FormatStr = *FormatStrOpt;
190 std::string Output;
191 raw_string_ostream OS(Output);
192 unsigned ArgIndex = 1; // Start from 1 since 0 is the format string.
193
194 for (unsigned I = 0; I < FormatStr.size();) {
195 if (FormatStr[I] != '%') {
196 OS << FormatStr[I++];
197 continue;
198 }
199
200 const size_t Start = I++;
201 if (I < FormatStr.size() && FormatStr[I] == '%') {
202 OS << '%';
203 ++I;
204 continue;
205 }
206
207 while (I < FormatStr.size() &&
208 StringRef("-= #0123456789").contains(C: FormatStr[I]))
209 ++I;
210
211 while (I < FormatStr.size() && StringRef("hljzt").contains(C: FormatStr[I]))
212 ++I;
213
214 if (I >= FormatStr.size()) {
215 Executor.reportImmediateUB()
216 << "Invalid format string in printf: missing conversion specifier.";
217 return AnyValue::poison();
218 }
219
220 char Specifier = FormatStr[I++];
221 std::string CleanChunk = FormatStr.substr(pos: Start, n: I - Start - 1);
222 CleanChunk.erase(
223 first: llvm::remove_if(Range&: CleanChunk,
224 P: [](char C) { return StringRef("hljzt").contains(C); }),
225 last: CleanChunk.end());
226
227 if (ArgIndex >= Args.size()) {
228 Executor.reportImmediateUB() << "Not enough arguments provided for the "
229 "format string. Required argument for '"
230 << Specifier << "'.";
231 return AnyValue::poison();
232 }
233
234 const auto &Arg = Args[ArgIndex++];
235 if (Arg.isPoison()) {
236 Executor.reportImmediateUB()
237 << "Poison argument passed to printf for format specifier '"
238 << Specifier << "' at argument index " << ArgIndex << ".";
239 return AnyValue::poison();
240 }
241
242 switch (Specifier) {
243 case 'd':
244 case 'i': {
245 std::string HostFmt = CleanChunk + "ll" + Specifier;
246 OS << format(Fmt: HostFmt.c_str(),
247 Vals: static_cast<long long>(Arg.asInteger().getSExtValue()));
248 break;
249 }
250 case 'u':
251 case 'o':
252 case 'x':
253 case 'X': {
254 // FIXME: The format specifiers "b" and "B" are not implemented here
255 // since currently MSVC doesn't support it.
256 std::string HostFmt = CleanChunk + "ll" + Specifier;
257 OS << format(Fmt: HostFmt.c_str(), Vals: static_cast<unsigned long long>(
258 Arg.asInteger().getZExtValue()));
259 break;
260 }
261 case 'c': {
262 std::string HostFmt = CleanChunk + Specifier;
263 OS << format(Fmt: HostFmt.c_str(),
264 Vals: static_cast<int>(Arg.asInteger().getZExtValue()));
265 break;
266 }
267 case 'f':
268 case 'e':
269 case 'E':
270 case 'g':
271 case 'G':
272 case 'a':
273 case 'A': {
274 std::string HostFmt = CleanChunk + Specifier;
275 OS << format(Fmt: HostFmt.c_str(), Vals: Arg.asFloat().convertToDouble());
276 break;
277 }
278 case 'n': {
279 OS.flush();
280 Executor.store(Ptr: Arg, Alignment: Align(4), Val: AnyValue(APInt(32, Output.size())),
281 ValTy: Type::getInt32Ty(C&: Ctx.getContext()));
282 break;
283 }
284 case 'p': {
285 std::string HostFmt = CleanChunk + "llx";
286 OS << "0x"
287 << format(Fmt: HostFmt.c_str(),
288 Vals: static_cast<unsigned long long>(
289 Arg.asPointer().address().getZExtValue()));
290 break;
291 }
292 case 's': {
293 auto StrOpt = readStringFromMemory(Ptr: Arg.asPointer());
294 if (!StrOpt)
295 return AnyValue::poison();
296 std::string HostFmt = CleanChunk + "s";
297 OS << format(Fmt: HostFmt.c_str(), Vals: StrOpt->c_str());
298 break;
299 }
300 default:
301 Executor.reportImmediateUB()
302 << "Unknown or unsupported format specifier '" << Specifier
303 << "' in printf.";
304 return AnyValue::poison();
305 }
306 }
307
308 OS.flush();
309 Handler.onPrint(Msg: Output);
310 return AnyValue(APInt(Executor.getIntSize(), Output.size()));
311}
312
313AnyValue Library::executeExit(ArrayRef<AnyValue> Args) {
314 const auto &RetCodeVal = Args[0];
315
316 Executor.requestProgramExit(Kind: ProgramExitInfo::ProgramExitKind::Exited,
317 ExitCode: RetCodeVal.asInteger().getZExtValue());
318 return AnyValue();
319}
320
321AnyValue Library::executeAbort() {
322 Executor.requestProgramExit(Kind: ProgramExitInfo::ProgramExitKind::Aborted);
323 return AnyValue();
324}
325
326AnyValue Library::executeTerminate() {
327 Executor.requestProgramExit(Kind: ProgramExitInfo::ProgramExitKind::Terminated);
328 return AnyValue();
329}
330
331std::optional<AnyValue> Library::executeLibcall(LibFunc LF, StringRef Name,
332 Type *Type,
333 ArrayRef<AnyValue> Args) {
334 unsigned Index = 0;
335 for (const AnyValue &Arg : Args) {
336 if (Arg.isPoison()) {
337 Executor.reportImmediateUB()
338 << "Poison argument passed to a library call at argument index "
339 << Index << ".";
340 return AnyValue::poison();
341 }
342 ++Index;
343 }
344
345 switch (LF) {
346 case LibFunc_malloc:
347 return executeMalloc(Name, Type, Args, AllocKind: MemAllocKind::Malloc);
348 case LibFunc_Znwm:
349 return executeMalloc(Name, Type, Args, AllocKind: MemAllocKind::New);
350 case LibFunc_Znam:
351 return executeMalloc(Name, Type, Args, AllocKind: MemAllocKind::NewArray);
352
353 case LibFunc_calloc:
354 return executeCalloc(Name, Type, Args, AllocKind: MemAllocKind::Malloc);
355
356 case LibFunc_free:
357 case LibFunc_ZdaPv:
358 case LibFunc_ZdlPv:
359 return executeFree(Args);
360
361 case LibFunc_puts:
362 return executePuts(Args);
363
364 case LibFunc_printf:
365 return executePrintf(Args);
366
367 case LibFunc_exit:
368 return executeExit(Args);
369
370 case LibFunc_abort:
371 return executeAbort();
372
373 case LibFunc_terminate:
374 return executeTerminate();
375
376 default:
377 return std::nullopt;
378 }
379}
380} // namespace llvm::ubi
381