1//===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that BPF uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "BPFISelLowering.h"
15#include "BPF.h"
16#include "BPFSubtarget.h"
17#include "llvm/CodeGen/CallingConvLower.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineJumpTableInfo.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24#include "llvm/CodeGen/ValueTypes.h"
25#include "llvm/IR/DIBuilder.h"
26#include "llvm/IR/DiagnosticInfo.h"
27#include "llvm/IR/DiagnosticPrinter.h"
28#include "llvm/IR/Module.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Support/raw_ostream.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "bpf-lower"
37
38static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
39 cl::Hidden, cl::init(Val: false),
40 cl::desc("Expand memcpy into load/store pairs in order"));
41
42static cl::opt<unsigned> BPFMinimumJumpTableEntries(
43 "bpf-min-jump-table-entries", cl::init(Val: 13), cl::Hidden,
44 cl::desc("Set minimum number of entries to use a jump table on BPF"));
45
46static cl::opt<bool> BPFAllowsLibcalls(
47 "bpf-allows-libcalls", cl::Hidden, cl::init(Val: false),
48 cl::desc("Allow libcalls instead of rejecting unsupported built-in "
49 "functions"));
50
51static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
52 SDValue Val = {}) {
53 std::string Str;
54 if (Val) {
55 raw_string_ostream OS(Str);
56 Val->print(OS);
57 OS << ' ';
58 }
59 MachineFunction &MF = DAG.getMachineFunction();
60 DAG.getContext()->diagnose(DI: DiagnosticInfoUnsupported(
61 MF.getFunction(), Twine(Str).concat(Suffix: Msg), DL.getDebugLoc()));
62}
63
64BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
65 const BPFSubtarget &STI)
66 : TargetLowering(TM, STI) {
67
68 // Set up the register classes.
69 addRegisterClass(VT: MVT::i64, RC: &BPF::GPRRegClass);
70 if (STI.getHasAlu32())
71 addRegisterClass(VT: MVT::i32, RC: &BPF::GPR32RegClass);
72
73 // Compute derived properties from the register classes
74 computeRegisterProperties(TRI: STI.getRegisterInfo());
75
76 setStackPointerRegisterToSaveRestore(BPF::R11);
77
78 setOperationAction(Op: ISD::BR_CC, VT: MVT::i64, Action: Custom);
79 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
80 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
81
82 if (!STI.hasGotox())
83 setOperationAction(Op: ISD::BRIND, VT: MVT::Other, Action: Expand);
84
85 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Custom);
86
87 setOperationAction(Ops: {ISD::GlobalAddress, ISD::ConstantPool}, VT: MVT::i64, Action: Custom);
88 if (STI.hasGotox())
89 setOperationAction(Ops: {ISD::JumpTable, ISD::BlockAddress}, VT: MVT::i64, Action: Custom);
90
91 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
92 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
93 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
94
95 // Set unsupported atomic operations as Custom so
96 // we can emit better error messages than fatal error
97 // from selectiondag.
98 for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
99 if (VT == MVT::i32) {
100 if (STI.getHasAlu32())
101 continue;
102 } else {
103 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Custom);
104 }
105
106 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Custom);
107 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Custom);
108 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Custom);
109 setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
110 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Custom);
111 }
112
113 for (auto VT : {MVT::i32, MVT::i64}) {
114 setOperationAction(Op: ISD::ATOMIC_LOAD, VT, Action: Custom);
115 setOperationAction(Op: ISD::ATOMIC_STORE, VT, Action: Custom);
116 }
117
118 setOperationAction(Op: ISD::ATOMIC_FENCE, VT: MVT::Other, Action: Custom);
119
120 for (auto VT : { MVT::i32, MVT::i64 }) {
121 if (VT == MVT::i32 && !STI.getHasAlu32())
122 continue;
123
124 setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
125 setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
126 if (!STI.hasSdivSmod()) {
127 setOperationAction(Op: ISD::SDIV, VT, Action: Custom);
128 setOperationAction(Op: ISD::SREM, VT, Action: Custom);
129 }
130 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
131 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
132 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
133 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
134 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
135 setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
136 setOperationAction(Op: ISD::SHL_PARTS, VT, Action: Custom);
137 setOperationAction(Op: ISD::SRL_PARTS, VT, Action: Custom);
138 setOperationAction(Op: ISD::SRA_PARTS, VT, Action: Custom);
139 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
140 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
141 setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
142 setOperationAction(Op: ISD::CTTZ_ZERO_POISON, VT, Action: Expand);
143 setOperationAction(Op: ISD::CTLZ_ZERO_POISON, VT, Action: Expand);
144
145 setOperationAction(Op: ISD::SETCC, VT, Action: Expand);
146 setOperationAction(Op: ISD::SELECT, VT, Action: Expand);
147 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Custom);
148 }
149
150 if (STI.getHasAlu32()) {
151 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Promote);
152 setOperationAction(Op: ISD::BR_CC, VT: MVT::i32,
153 Action: STI.getHasJmp32() ? Custom : Promote);
154 }
155
156 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
157 if (!STI.hasMovsx()) {
158 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
159 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
160 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i32, Action: Expand);
161 }
162
163 // Extended load operations for i1 types must be promoted
164 for (MVT VT : MVT::integer_valuetypes()) {
165 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
166 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
167 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
168
169 if (!STI.hasLdsx()) {
170 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
171 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i16, Action: Expand);
172 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i32, Action: Expand);
173 }
174 }
175
176 setBooleanContents(ZeroOrOneBooleanContent);
177 setMaxAtomicSizeInBitsSupported(64);
178 setMinimumJumpTableEntries(BPFMinimumJumpTableEntries);
179
180 // Function alignments
181 setMinFunctionAlignment(Align(8));
182 setPrefFunctionAlignment(Align(8));
183
184 if (BPFExpandMemcpyInOrder) {
185 // LLVM generic code will try to expand memcpy into load/store pairs at this
186 // stage which is before quite a few IR optimization passes, therefore the
187 // loads and stores could potentially be moved apart from each other which
188 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
189 // compilers.
190 //
191 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
192 // of memcpy to later stage in IR optimization pipeline so those load/store
193 // pairs won't be touched and could be kept in order. Hence, we set
194 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
195 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
196 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
197 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
198 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
199 MaxLoadsPerMemcmp = 0;
200 } else {
201 // inline memcpy() for kernel to see explicit copy
202 unsigned CommonMaxStores =
203 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
204
205 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
206 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
207 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
208 MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
209 }
210
211 // CPU/Feature control
212 HasAlu32 = STI.getHasAlu32();
213 HasJmp32 = STI.getHasJmp32();
214 HasJmpExt = STI.getHasJmpExt();
215 HasMovsx = STI.hasMovsx();
216
217 AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess();
218}
219
220bool BPFTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
221 MachineMemOperand::Flags,
222 unsigned *Fast) const {
223 // allows-misaligned-mem-access is disabled
224 if (!AllowsMisalignedMemAccess)
225 return false;
226
227 // only allow misalignment for simple value types
228 if (!VT.isSimple())
229 return false;
230
231 // always assume fast mode when misalignment is allowed
232 if (Fast)
233 *Fast = true;
234
235 return true;
236}
237
238bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
239 return false;
240}
241
242bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
243 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
244 return false;
245 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
246 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
247 return NumBits1 > NumBits2;
248}
249
250bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
251 if (!VT1.isInteger() || !VT2.isInteger())
252 return false;
253 unsigned NumBits1 = VT1.getSizeInBits();
254 unsigned NumBits2 = VT2.getSizeInBits();
255 return NumBits1 > NumBits2;
256}
257
258bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
259 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
260 return false;
261 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
262 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
263 return NumBits1 == 32 && NumBits2 == 64;
264}
265
266bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
267 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
268 return false;
269 unsigned NumBits1 = VT1.getSizeInBits();
270 unsigned NumBits2 = VT2.getSizeInBits();
271 return NumBits1 == 32 && NumBits2 == 64;
272}
273
274bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
275 EVT VT1 = Val.getValueType();
276 if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
277 MVT MT1 = VT1.getSimpleVT().SimpleTy;
278 MVT MT2 = VT2.getSimpleVT().SimpleTy;
279 if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
280 (MT2 == MVT::i32 || MT2 == MVT::i64))
281 return true;
282 }
283 return TargetLoweringBase::isZExtFree(Val, VT2);
284}
285
286unsigned BPFTargetLowering::getJumpTableEncoding() const {
287 return MachineJumpTableInfo::EK_BlockAddress;
288}
289
290BPFTargetLowering::ConstraintType
291BPFTargetLowering::getConstraintType(StringRef Constraint) const {
292 if (Constraint.size() == 1) {
293 switch (Constraint[0]) {
294 default:
295 break;
296 case 'w':
297 return C_RegisterClass;
298 }
299 }
300
301 return TargetLowering::getConstraintType(Constraint);
302}
303
304std::pair<unsigned, const TargetRegisterClass *>
305BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
306 StringRef Constraint,
307 MVT VT) const {
308 if (Constraint.size() == 1) {
309 // GCC Constraint Letters
310 switch (Constraint[0]) {
311 case 'r': // GENERAL_REGS
312 return std::make_pair(x: 0U, y: &BPF::GPRRegClass);
313 case 'w':
314 if (HasAlu32)
315 return std::make_pair(x: 0U, y: &BPF::GPR32RegClass);
316 break;
317 default:
318 break;
319 }
320 }
321
322 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
323}
324
325void BPFTargetLowering::ReplaceNodeResults(
326 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
327 const char *Msg;
328 uint32_t Opcode = N->getOpcode();
329 switch (Opcode) {
330 default:
331 report_fatal_error(reason: "unhandled custom legalization: " + Twine(Opcode));
332 case ISD::ATOMIC_LOAD_ADD:
333 case ISD::ATOMIC_LOAD_AND:
334 case ISD::ATOMIC_LOAD_OR:
335 case ISD::ATOMIC_LOAD_XOR:
336 case ISD::ATOMIC_SWAP:
337 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
338 if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
339 Msg = "unsupported atomic operation, please use 32/64 bit version";
340 else
341 Msg = "unsupported atomic operation, please use 64 bit version";
342 break;
343 case ISD::ATOMIC_LOAD:
344 case ISD::ATOMIC_STORE:
345 return;
346 }
347
348 SDLoc DL(N);
349 // We'll still produce a fatal error downstream, but this diagnostic is more
350 // user-friendly.
351 fail(DL, DAG, Msg);
352}
353
354SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
355 switch (Op.getOpcode()) {
356 default:
357 report_fatal_error(reason: "unimplemented opcode: " + Twine(Op.getOpcode()));
358 case ISD::BR_CC:
359 return LowerBR_CC(Op, DAG);
360 case ISD::JumpTable:
361 return LowerJumpTable(Op, DAG);
362 case ISD::GlobalAddress:
363 return LowerGlobalAddress(Op, DAG);
364 case ISD::ConstantPool:
365 return LowerConstantPool(Op, DAG);
366 case ISD::BlockAddress:
367 return LowerBlockAddress(Op, DAG);
368 case ISD::SELECT_CC:
369 return LowerSELECT_CC(Op, DAG);
370 case ISD::SDIV:
371 case ISD::SREM:
372 return LowerSDIVSREM(Op, DAG);
373 case ISD::SHL_PARTS:
374 case ISD::SRL_PARTS:
375 case ISD::SRA_PARTS:
376 return LowerShiftParts(Op, DAG);
377 case ISD::DYNAMIC_STACKALLOC:
378 return LowerDYNAMIC_STACKALLOC(Op, DAG);
379 case ISD::ATOMIC_LOAD:
380 case ISD::ATOMIC_STORE:
381 return LowerATOMIC_LOAD_STORE(Op, DAG);
382 case ISD::ATOMIC_FENCE:
383 return LowerATOMIC_FENCE(Op, DAG);
384 case ISD::TRAP:
385 return LowerTRAP(Op, DAG);
386 }
387}
388
389// Calling Convention Implementation
390#include "BPFGenCallingConv.inc"
391
392// Apply AssertSext/AssertZext and truncate based on VA's LocInfo.
393static SDValue convertLocValType(SelectionDAG &DAG, const SDLoc &DL,
394 const CCValAssign &VA, EVT RegVT,
395 SDValue ArgValue) {
396 if (VA.getLocInfo() == CCValAssign::SExt)
397 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: RegVT, N1: ArgValue,
398 N2: DAG.getValueType(VA.getValVT()));
399 else if (VA.getLocInfo() == CCValAssign::ZExt)
400 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: RegVT, N1: ArgValue,
401 N2: DAG.getValueType(VA.getValVT()));
402 if (VA.getLocInfo() != CCValAssign::Full)
403 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: ArgValue);
404 return ArgValue;
405}
406
407SDValue BPFTargetLowering::LowerFormalArguments(
408 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
409 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
410 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
411 switch (CallConv) {
412 default:
413 report_fatal_error(reason: "unimplemented calling convention: " + Twine(CallConv));
414 case CallingConv::C:
415 case CallingConv::Fast:
416 break;
417 }
418
419 MachineFunction &MF = DAG.getMachineFunction();
420 MachineRegisterInfo &RegInfo = MF.getRegInfo();
421
422 // Assign locations to all of the incoming arguments.
423 SmallVector<CCValAssign, 16> ArgLocs;
424 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
425 CCInfo.AnalyzeFormalArguments(Ins, Fn: getHasAlu32() ? CC_BPF32 : CC_BPF64);
426
427 for (size_t I = 0; I < ArgLocs.size(); ++I) {
428 auto &VA = ArgLocs[I];
429 EVT RegVT = VA.getLocVT();
430
431 if (VA.isRegLoc()) {
432 // Arguments passed in registers
433 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
434 switch (SimpleTy) {
435 default: {
436 std::string Str;
437 {
438 raw_string_ostream OS(Str);
439 RegVT.print(OS);
440 }
441 report_fatal_error(reason: "unhandled argument type: " + Twine(Str));
442 }
443 case MVT::i32:
444 case MVT::i64:
445 Register VReg = RegInfo.createVirtualRegister(
446 RegClass: SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
447 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
448 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: RegVT);
449 InVals.push_back(Elt: convertLocValType(DAG, DL, VA, RegVT, ArgValue));
450 break;
451 }
452 continue;
453 }
454
455 if (VA.isMemLoc()) {
456 // For example, two stack arguments,
457 // arg1: Off = 8
458 // arg2: off = 16
459 int Off = VA.getLocMemOffset() + 8;
460 if (Off > INT16_MAX) {
461 fail(DL, DAG, Msg: "extra parameter stack depth exceeded limit");
462 break;
463 }
464
465 // Physical extra argument slot is always 64-bit.
466 SDValue StackVal = DAG.getNode(Opcode: BPFISD::LOAD_STACK_ARG, DL,
467 VTList: DAG.getVTList(VT1: MVT::i64, VT2: MVT::Other), N1: Chain,
468 N2: DAG.getConstant(Val: Off, DL, VT: MVT::i64));
469 SDValue ArgValue = StackVal.getValue(R: 0);
470 Chain = StackVal.getValue(R: 1);
471 InVals.push_back(Elt: convertLocValType(DAG, DL, VA, RegVT: MVT::i64, ArgValue));
472 continue;
473 }
474 }
475
476 if (IsVarArg)
477 fail(DL, DAG, Msg: "variadic functions are not supported");
478 return Chain;
479}
480
481static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
482 MCRegister Reg) {
483 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
484 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
485}
486
487static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
488 MachineFunction &MF,
489 const uint32_t *BaseRegMask) {
490 uint32_t *RegMask = MF.allocateRegMask();
491 unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
492 memcpy(dest: RegMask, src: BaseRegMask, n: sizeof(RegMask[0]) * RegMaskSize);
493 return RegMask;
494}
495
496SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
497 SmallVectorImpl<SDValue> &InVals) const {
498 SelectionDAG &DAG = CLI.DAG;
499 auto &Outs = CLI.Outs;
500 auto &OutVals = CLI.OutVals;
501 auto &Ins = CLI.Ins;
502 SDValue Chain = CLI.Chain;
503 SDValue Callee = CLI.Callee;
504 bool &IsTailCall = CLI.IsTailCall;
505 CallingConv::ID CallConv = CLI.CallConv;
506 bool IsVarArg = CLI.IsVarArg;
507 MachineFunction &MF = DAG.getMachineFunction();
508
509 // BPF target does not support tail call optimization.
510 IsTailCall = false;
511
512 switch (CallConv) {
513 default:
514 report_fatal_error(reason: "unsupported calling convention: " + Twine(CallConv));
515 case CallingConv::Fast:
516 case CallingConv::C:
517 break;
518 }
519
520 // Analyze operands of the call, assigning locations to each operand.
521 SmallVector<CCValAssign, 16> ArgLocs;
522 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
523
524 CCInfo.AnalyzeCallOperands(Outs, Fn: getHasAlu32() ? CC_BPF32 : CC_BPF64);
525
526 unsigned NumBytes = CCInfo.getStackSize();
527
528 for (auto &Arg : Outs) {
529 ISD::ArgFlagsTy Flags = Arg.Flags;
530 if (!Flags.isByVal())
531 continue;
532 fail(DL: CLI.DL, DAG, Msg: "pass by value not supported", Val: Callee);
533 break;
534 }
535
536 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
537 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
538
539 SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
540
541 // Walk arg assignments
542 for (size_t i = 0; i < OutVals.size(); ++i) {
543 CCValAssign &VA = ArgLocs[i];
544 SDValue &Arg = OutVals[i];
545
546 // Promote the value if needed.
547 switch (VA.getLocInfo()) {
548 default:
549 report_fatal_error(reason: "unhandled location info: " + Twine(VA.getLocInfo()));
550 case CCValAssign::Full:
551 break;
552 case CCValAssign::SExt:
553 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
554 break;
555 case CCValAssign::ZExt:
556 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
557 break;
558 case CCValAssign::AExt:
559 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
560 break;
561 }
562
563 // Push arguments into RegsToPass vector
564 if (VA.isRegLoc()) {
565 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
566 continue;
567 }
568
569 if (VA.isMemLoc()) {
570 int Off = -8 - VA.getLocMemOffset();
571 if (Off < INT16_MIN) {
572 fail(DL: CLI.DL, DAG, Msg: "extra parameter stack depth exceeded limit");
573 break;
574 }
575
576 // STORE_STACK_ARG requires i64 operands. With ALU32 mode, the CC
577 // promotion may only extend to i32, so extend to i64 if needed.
578 if (Arg.getValueType() != MVT::i64)
579 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: CLI.DL, VT: MVT::i64, Operand: Arg);
580
581 SDValue OffVal = DAG.getConstant(Val: Off, DL: CLI.DL, VT: MVT::i64);
582 Chain = DAG.getNode(Opcode: BPFISD::STORE_STACK_ARG, DL: CLI.DL, VT: MVT::Other, N1: Chain,
583 N2: OffVal, N3: Arg);
584 continue;
585 }
586
587 report_fatal_error(reason: "unhandled argument location");
588 }
589
590 SDValue InGlue;
591
592 // Build a sequence of copy-to-reg nodes chained together with token chain and
593 // flag operands which copy the outgoing args into registers. The InGlue in
594 // necessary since all emitted instructions must be stuck together.
595 for (auto &Reg : RegsToPass) {
596 Chain = DAG.getCopyToReg(Chain, dl: CLI.DL, Reg: Reg.first, N: Reg.second, Glue: InGlue);
597 InGlue = Chain.getValue(R: 1);
598 }
599
600 // If the callee is a GlobalAddress node (quite common, every direct call is)
601 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
602 // Likewise ExternalSymbol -> TargetExternalSymbol.
603 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
604 Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL: CLI.DL, VT: PtrVT,
605 offset: G->getOffset(), TargetFlags: 0);
606 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
607 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: 0);
608 StringRef Sym = E->getSymbol();
609 if (!BPFAllowsLibcalls && Sym != BPF_TRAP && Sym != "__multi3" &&
610 Sym != "__divti3" && Sym != "__modti3" && Sym != "__udivti3" &&
611 Sym != "__umodti3" && Sym != "memcpy" && Sym != "memset" &&
612 Sym != "memmove")
613 fail(
614 DL: CLI.DL, DAG,
615 Msg: Twine("A call to built-in function '" + Sym + "' is not supported."));
616 }
617
618 // Returns a chain & a flag for retval copy to use.
619 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
620 SmallVector<SDValue, 8> Ops;
621 Ops.push_back(Elt: Chain);
622 Ops.push_back(Elt: Callee);
623
624 // Add argument registers to the end of the list so that they are
625 // known live into the call.
626 for (auto &Reg : RegsToPass)
627 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
628
629 bool HasFastCall =
630 (CLI.CB && isa<CallInst>(Val: CLI.CB) && CLI.CB->hasFnAttr(Kind: "bpf_fastcall"));
631 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
632 if (HasFastCall) {
633 uint32_t *RegMask = regMaskFromTemplate(
634 TRI, MF, BaseRegMask: TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
635 for (auto const &RegPair : RegsToPass)
636 resetRegMaskBit(TRI, RegMask, Reg: RegPair.first);
637 if (!CLI.CB->getType()->isVoidTy())
638 resetRegMaskBit(TRI, RegMask, Reg: BPF::R0);
639 Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
640 } else {
641 Ops.push_back(
642 Elt: DAG.getRegisterMask(RegMask: TRI->getCallPreservedMask(MF, CLI.CallConv)));
643 }
644
645 if (InGlue.getNode())
646 Ops.push_back(Elt: InGlue);
647
648 Chain = DAG.getNode(Opcode: BPFISD::CALL, DL: CLI.DL, VTList: NodeTys, Ops);
649 InGlue = Chain.getValue(R: 1);
650
651 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
652
653 // Create the CALLSEQ_END node.
654 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue: InGlue, DL: CLI.DL);
655 InGlue = Chain.getValue(R: 1);
656
657 // Handle result values, copying them out of physregs into vregs that we
658 // return.
659 return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL: CLI.DL, DAG,
660 InVals);
661}
662
663SDValue
664BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
665 bool IsVarArg,
666 const SmallVectorImpl<ISD::OutputArg> &Outs,
667 const SmallVectorImpl<SDValue> &OutVals,
668 const SDLoc &DL, SelectionDAG &DAG) const {
669 unsigned Opc = BPFISD::RET_GLUE;
670
671 // CCValAssign - represent the assignment of the return value to a location
672 SmallVector<CCValAssign, 16> RVLocs;
673 MachineFunction &MF = DAG.getMachineFunction();
674
675 // CCState - Info about the registers and stack slot.
676 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
677
678 // Analize return values.
679 CCInfo.AnalyzeReturn(Outs, Fn: getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
680
681 SDValue Glue;
682 SmallVector<SDValue, 4> RetOps(1, Chain);
683
684 // Copy the result values into the output registers.
685 for (size_t i = 0; i != RVLocs.size(); ++i) {
686 CCValAssign &VA = RVLocs[i];
687 if (!VA.isRegLoc())
688 report_fatal_error(reason: "stack return values are not supported");
689
690 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVals[i], Glue);
691
692 // Guarantee that all emitted copies are stuck together,
693 // avoiding something bad.
694 Glue = Chain.getValue(R: 1);
695 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
696 }
697
698 RetOps[0] = Chain; // Update chain.
699
700 // Add the glue if we have it.
701 if (Glue.getNode())
702 RetOps.push_back(Elt: Glue);
703
704 return DAG.getNode(Opcode: Opc, DL, VT: MVT::Other, Ops: RetOps);
705}
706
707SDValue BPFTargetLowering::LowerCallResult(
708 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
709 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
710 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
711
712 MachineFunction &MF = DAG.getMachineFunction();
713 // Assign locations to each value returned by this call.
714 SmallVector<CCValAssign, 16> RVLocs;
715 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
716
717 CCInfo.AnalyzeCallResult(Ins, Fn: getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
718
719 // Copy all of the result registers out of their specified physreg.
720 for (auto &Val : RVLocs) {
721 Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: Val.getLocReg(),
722 VT: Val.getValVT(), Glue: InGlue).getValue(R: 1);
723 InGlue = Chain.getValue(R: 2);
724 InVals.push_back(Elt: Chain.getValue(R: 0));
725 }
726
727 return Chain;
728}
729
730static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
731 switch (CC) {
732 default:
733 break;
734 case ISD::SETULT:
735 case ISD::SETULE:
736 case ISD::SETLT:
737 case ISD::SETLE:
738 CC = ISD::getSetCCSwappedOperands(Operation: CC);
739 std::swap(a&: LHS, b&: RHS);
740 break;
741 }
742}
743
744SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
745 SDLoc DL(Op);
746 fail(DL, DAG,
747 Msg: "unsupported signed division, please convert to unsigned div/mod.");
748 return DAG.getUNDEF(VT: Op->getValueType(ResNo: 0));
749}
750
751SDValue BPFTargetLowering::LowerShiftParts(SDValue Op,
752 SelectionDAG &DAG) const {
753 SDValue Lo, Hi;
754 expandShiftParts(N: Op.getNode(), Lo, Hi, DAG);
755 return DAG.getMergeValues(Ops: {Lo, Hi}, dl: SDLoc(Op));
756}
757
758SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
759 SelectionDAG &DAG) const {
760 SDLoc DL(Op);
761 fail(DL, DAG, Msg: "unsupported dynamic stack allocation");
762 auto Ops = {DAG.getConstant(Val: 0, DL: SDLoc(), VT: Op.getValueType()), Op.getOperand(i: 0)};
763 return DAG.getMergeValues(Ops, dl: SDLoc());
764}
765
766SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
767 SDValue Chain = Op.getOperand(i: 0);
768 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 1))->get();
769 SDValue LHS = Op.getOperand(i: 2);
770 SDValue RHS = Op.getOperand(i: 3);
771 SDValue Dest = Op.getOperand(i: 4);
772 SDLoc DL(Op);
773
774 if (!getHasJmpExt())
775 NegateCC(LHS, RHS, CC);
776
777 return DAG.getNode(Opcode: BPFISD::BR_CC, DL, VT: Op.getValueType(), N1: Chain, N2: LHS, N3: RHS,
778 N4: DAG.getConstant(Val: CC, DL, VT: LHS.getValueType()), N5: Dest);
779}
780
781SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
782 SDValue LHS = Op.getOperand(i: 0);
783 SDValue RHS = Op.getOperand(i: 1);
784 SDValue TrueV = Op.getOperand(i: 2);
785 SDValue FalseV = Op.getOperand(i: 3);
786 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
787 SDLoc DL(Op);
788
789 if (!getHasJmpExt())
790 NegateCC(LHS, RHS, CC);
791
792 SDValue TargetCC = DAG.getConstant(Val: CC, DL, VT: LHS.getValueType());
793 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
794
795 return DAG.getNode(Opcode: BPFISD::SELECT_CC, DL, VT: Op.getValueType(), Ops);
796}
797
798SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
799 SelectionDAG &DAG) const {
800 SDNode *N = Op.getNode();
801 SDLoc DL(N);
802
803 if (cast<AtomicSDNode>(Val: N)->getMergedOrdering() ==
804 AtomicOrdering::SequentiallyConsistent)
805 fail(DL, DAG,
806 Msg: "sequentially consistent (seq_cst) "
807 "atomic load/store is not supported");
808
809 return Op;
810}
811
812SDValue BPFTargetLowering::LowerATOMIC_FENCE(SDValue Op,
813 SelectionDAG &DAG) const {
814 SDLoc DL(Op);
815 SyncScope::ID FenceSSID =
816 static_cast<SyncScope::ID>(Op.getConstantOperandVal(i: 2));
817
818 if (FenceSSID == SyncScope::SingleThread)
819 // MEMBARRIER is a compiler barrier; it codegens to a no-op.
820 return DAG.getNode(Opcode: ISD::MEMBARRIER, DL, VT: MVT::Other, Operand: Op.getOperand(i: 0));
821
822 report_fatal_error(reason: "Runtime fence is not supported at the moment");
823}
824
825static Function *createBPFUnreachable(Module *M) {
826 if (auto *Fn = M->getFunction(BPF_TRAP))
827 return Fn;
828
829 FunctionType *FT = FunctionType::get(Result: Type::getVoidTy(C&: M->getContext()), isVarArg: false);
830 Function *NewF =
831 Function::Create(Ty: FT, Linkage: GlobalValue::ExternalWeakLinkage, BPF_TRAP, M);
832 NewF->setDSOLocal(true);
833 NewF->setCallingConv(CallingConv::C);
834 NewF->setSection(".ksyms");
835
836 if (M->debug_compile_units().empty())
837 return NewF;
838
839 DIBuilder DBuilder(*M);
840 DITypeArray ParamTypes =
841 DBuilder.getOrCreateTypeArray(Elements: {nullptr /*void return*/});
842 DISubroutineType *FuncType = DBuilder.createSubroutineType(ParameterTypes: ParamTypes);
843 DICompileUnit *CU = *M->debug_compile_units_begin();
844 DISubprogram *SP =
845 DBuilder.createFunction(Scope: CU, BPF_TRAP, BPF_TRAP, File: nullptr, LineNo: 0, Ty: FuncType, ScopeLine: 0,
846 Flags: DINode::FlagZero, SPFlags: DISubprogram::SPFlagZero);
847 NewF->setSubprogram(SP);
848 return NewF;
849}
850
851SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const {
852 MachineFunction &MF = DAG.getMachineFunction();
853 TargetLowering::CallLoweringInfo CLI(DAG);
854 SmallVector<SDValue> InVals;
855 SDNode *N = Op.getNode();
856 SDLoc DL(N);
857
858 Function *Fn = createBPFUnreachable(M: MF.getFunction().getParent());
859 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
860 CLI.Callee = DAG.getTargetGlobalAddress(GV: Fn, DL, VT: PtrVT);
861 CLI.Chain = N->getOperand(Num: 0);
862 CLI.IsTailCall = false;
863 CLI.CallConv = CallingConv::C;
864 CLI.IsVarArg = false;
865 CLI.DL = std::move(DL);
866 CLI.NoMerge = false;
867 CLI.DoesNotReturn = true;
868 return LowerCall(CLI, InVals);
869}
870
871SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
872 JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op);
873 return getAddr(N, DAG);
874}
875
876static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
877 SelectionDAG &DAG, unsigned Flags) {
878 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
879 Offset: N->getOffset(), TargetFlags: Flags);
880}
881
882static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
883 SelectionDAG &DAG, unsigned Flags) {
884 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
885}
886
887template <class NodeTy>
888SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
889 unsigned Flags) const {
890 SDLoc DL(N);
891
892 SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags);
893
894 return DAG.getNode(Opcode: BPFISD::Wrapper, DL, VT: MVT::i64, Operand: GA);
895}
896
897SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
898 SelectionDAG &DAG) const {
899 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
900 if (N->getOffset() != 0)
901 report_fatal_error(reason: "invalid offset for global address: " +
902 Twine(N->getOffset()));
903
904 const GlobalValue *GVal = N->getGlobal();
905 SDLoc DL(Op);
906
907 // Wrap it in a TargetGlobalAddress
908 SDValue Addr = DAG.getTargetGlobalAddress(GV: GVal, DL, VT: MVT::i64);
909
910 // Emit pseudo instruction
911 return SDValue(DAG.getMachineNode(Opcode: BPF::LDIMM64, dl: DL, VT: MVT::i64, Op1: Addr), 0);
912}
913
914SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
915 SelectionDAG &DAG) const {
916 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op);
917
918 return getAddr(N, DAG);
919}
920
921SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
922 SelectionDAG &DAG) const {
923 const BlockAddress *BA = cast<BlockAddressSDNode>(Val&: Op)->getBlockAddress();
924 SDLoc DL(Op);
925
926 // Wrap it in a TargetBlockAddress
927 SDValue Addr = DAG.getTargetBlockAddress(BA, VT: MVT::i64);
928
929 // Emit pseudo instruction
930 return SDValue(DAG.getMachineNode(Opcode: BPF::LDIMM64, dl: DL, VT: MVT::i64, Op1: Addr), 0);
931}
932
933unsigned
934BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
935 unsigned Reg, bool isSigned) const {
936 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
937 const TargetRegisterClass *RC = getRegClassFor(VT: MVT::i64);
938 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
939 MachineFunction *F = BB->getParent();
940 DebugLoc DL = MI.getDebugLoc();
941
942 MachineRegisterInfo &RegInfo = F->getRegInfo();
943
944 if (!isSigned) {
945 Register PromotedReg0 = RegInfo.createVirtualRegister(RegClass: RC);
946 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOV_32_64), DestReg: PromotedReg0).addReg(RegNo: Reg);
947 return PromotedReg0;
948 }
949 Register PromotedReg0 = RegInfo.createVirtualRegister(RegClass: RC);
950 Register PromotedReg1 = RegInfo.createVirtualRegister(RegClass: RC);
951 Register PromotedReg2 = RegInfo.createVirtualRegister(RegClass: RC);
952 if (HasMovsx) {
953 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOVSX_rr_32), DestReg: PromotedReg0).addReg(RegNo: Reg);
954 } else {
955 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOV_32_64), DestReg: PromotedReg0).addReg(RegNo: Reg);
956 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::SLL_ri), DestReg: PromotedReg1)
957 .addReg(RegNo: PromotedReg0).addImm(Val: 32);
958 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: RShiftOp), DestReg: PromotedReg2)
959 .addReg(RegNo: PromotedReg1).addImm(Val: 32);
960 }
961
962 return PromotedReg2;
963}
964
965MachineBasicBlock *
966BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
967 MachineBasicBlock *BB)
968 const {
969 MachineFunction *MF = MI.getParent()->getParent();
970 MachineRegisterInfo &MRI = MF->getRegInfo();
971 MachineInstrBuilder MIB(*MF, MI);
972 unsigned ScratchReg;
973
974 // This function does custom insertion during lowering BPFISD::MEMCPY which
975 // only has two register operands from memcpy semantics, the copy source
976 // address and the copy destination address.
977 //
978 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
979 // a third scratch register to serve as the destination register of load and
980 // source register of store.
981 //
982 // The scratch register here is with the Define | Dead | EarlyClobber flags.
983 // The EarlyClobber flag has the semantic property that the operand it is
984 // attached to is clobbered before the rest of the inputs are read. Hence it
985 // must be unique among the operands to the instruction. The Define flag is
986 // needed to coerce the machine verifier that an Undef value isn't a problem
987 // as we anyway is loading memory into it. The Dead flag is needed as the
988 // value in scratch isn't supposed to be used by any other instruction.
989 ScratchReg = MRI.createVirtualRegister(RegClass: &BPF::GPRRegClass);
990 MIB.addReg(RegNo: ScratchReg,
991 Flags: RegState::Define | RegState::Dead | RegState::EarlyClobber);
992
993 return BB;
994}
995
996MachineBasicBlock *BPFTargetLowering::EmitInstrWithCustomInserterLDimm64(
997 MachineInstr &MI, MachineBasicBlock *BB) const {
998 MachineFunction *MF = BB->getParent();
999 const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
1000 const TargetRegisterClass *RC = getRegClassFor(VT: MVT::i64);
1001 MachineRegisterInfo &RegInfo = MF->getRegInfo();
1002 DebugLoc DL = MI.getDebugLoc();
1003
1004 // Build address taken map for Global Varaibles and BlockAddresses
1005 DenseMap<const BasicBlock *, MachineBasicBlock *> AddressTakenBBs;
1006 for (MachineBasicBlock &MBB : *MF) {
1007 if (const BasicBlock *BB = MBB.getBasicBlock())
1008 if (BB->hasAddressTaken())
1009 AddressTakenBBs[BB] = &MBB;
1010 }
1011
1012 MachineOperand &MO = MI.getOperand(i: 1);
1013 assert(MO.isBlockAddress() || MO.isGlobal());
1014
1015 Register ResultReg = MI.getOperand(i: 0).getReg();
1016 Register TmpReg = RegInfo.createVirtualRegister(RegClass: RC);
1017
1018 std::vector<MachineBasicBlock *> Targets;
1019 unsigned JTI;
1020
1021 if (MO.isBlockAddress()) {
1022 auto *BA = MO.getBlockAddress();
1023 MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
1024 assert(TgtMBB);
1025
1026 Targets.push_back(x: TgtMBB);
1027 JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: getJumpTableEncoding())
1028 ->createJumpTableIndex(DestBBs: Targets);
1029
1030 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LD_imm64), DestReg: TmpReg)
1031 .addJumpTableIndex(Idx: JTI);
1032 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LDD), DestReg: ResultReg)
1033 .addReg(RegNo: TmpReg)
1034 .addImm(Val: 0);
1035 MI.eraseFromParent();
1036 return BB;
1037 }
1038
1039 // Helper: emit LD_imm64 with operand GlobalAddress or JumpTable
1040 auto emitLDImm64 = [&](const GlobalValue *GV = nullptr, unsigned JTI = -1) {
1041 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LD_imm64), DestReg: ResultReg);
1042 if (GV)
1043 MIB.addGlobalAddress(GV);
1044 else
1045 MIB.addJumpTableIndex(Idx: JTI);
1046 MI.eraseFromParent();
1047 return BB;
1048 };
1049
1050 // Must be a global at this point
1051 const GlobalValue *GVal = MO.getGlobal();
1052 const auto *GV = dyn_cast<GlobalVariable>(Val: GVal);
1053
1054 if (!GV || GV->getLinkage() != GlobalValue::PrivateLinkage ||
1055 !GV->isConstant() || !GV->hasInitializer())
1056 return emitLDImm64(GVal);
1057
1058 const auto *CA = dyn_cast<ConstantArray>(Val: GV->getInitializer());
1059 if (!CA)
1060 return emitLDImm64(GVal);
1061
1062 for (const Use &Op : CA->operands()) {
1063 if (!isa<BlockAddress>(Val: Op))
1064 return emitLDImm64(GVal);
1065 auto *BA = cast<BlockAddress>(Val: Op);
1066 MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
1067 assert(TgtMBB);
1068 Targets.push_back(x: TgtMBB);
1069 }
1070
1071 JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: getJumpTableEncoding())
1072 ->createJumpTableIndex(DestBBs: Targets);
1073 return emitLDImm64(nullptr, JTI);
1074}
1075
1076MachineBasicBlock *
1077BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1078 MachineBasicBlock *BB) const {
1079 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1080 DebugLoc DL = MI.getDebugLoc();
1081 unsigned Opc = MI.getOpcode();
1082 bool isSelectRROp = (Opc == BPF::Select ||
1083 Opc == BPF::Select_64_32 ||
1084 Opc == BPF::Select_32 ||
1085 Opc == BPF::Select_32_64);
1086
1087 bool isMemcpyOp = Opc == BPF::MEMCPY;
1088 bool isLDimm64Op = Opc == BPF::LDIMM64;
1089
1090#ifndef NDEBUG
1091 bool isSelectRIOp = (Opc == BPF::Select_Ri ||
1092 Opc == BPF::Select_Ri_64_32 ||
1093 Opc == BPF::Select_Ri_32 ||
1094 Opc == BPF::Select_Ri_32_64);
1095
1096 if (!(isSelectRROp || isSelectRIOp || isMemcpyOp || isLDimm64Op))
1097 report_fatal_error("unhandled instruction type: " + Twine(Opc));
1098#endif
1099
1100 if (isMemcpyOp)
1101 return EmitInstrWithCustomInserterMemcpy(MI, BB);
1102
1103 if (isLDimm64Op)
1104 return EmitInstrWithCustomInserterLDimm64(MI, BB);
1105
1106 bool is32BitCmp = (Opc == BPF::Select_32 ||
1107 Opc == BPF::Select_32_64 ||
1108 Opc == BPF::Select_Ri_32 ||
1109 Opc == BPF::Select_Ri_32_64);
1110
1111 // To "insert" a SELECT instruction, we actually have to insert the diamond
1112 // control-flow pattern. The incoming instruction knows the destination vreg
1113 // to set, the condition code register to branch on, the true/false values to
1114 // select between, and a branch opcode to use.
1115 const BasicBlock *LLVM_BB = BB->getBasicBlock();
1116 MachineFunction::iterator I = ++BB->getIterator();
1117
1118 // ThisMBB:
1119 // ...
1120 // TrueVal = ...
1121 // jmp_XX r1, r2 goto Copy1MBB
1122 // fallthrough --> Copy0MBB
1123 MachineBasicBlock *ThisMBB = BB;
1124 MachineFunction *F = BB->getParent();
1125 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1126 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1127
1128 F->insert(MBBI: I, MBB: Copy0MBB);
1129 F->insert(MBBI: I, MBB: Copy1MBB);
1130 // Update machine-CFG edges by transferring all successors of the current
1131 // block to the new block which will contain the Phi node for the select.
1132 Copy1MBB->splice(Where: Copy1MBB->begin(), Other: BB,
1133 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
1134 Copy1MBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
1135 // Next, add the true and fallthrough blocks as its successors.
1136 BB->addSuccessor(Succ: Copy0MBB);
1137 BB->addSuccessor(Succ: Copy1MBB);
1138
1139 // Insert Branch if Flag
1140 int CC = MI.getOperand(i: 3).getImm();
1141 int NewCC;
1142 switch (CC) {
1143#define SET_NEWCC(X, Y) \
1144 case ISD::X: \
1145 if (is32BitCmp && HasJmp32) \
1146 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
1147 else \
1148 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
1149 break
1150 SET_NEWCC(SETGT, JSGT);
1151 SET_NEWCC(SETUGT, JUGT);
1152 SET_NEWCC(SETGE, JSGE);
1153 SET_NEWCC(SETUGE, JUGE);
1154 SET_NEWCC(SETEQ, JEQ);
1155 SET_NEWCC(SETNE, JNE);
1156 SET_NEWCC(SETLT, JSLT);
1157 SET_NEWCC(SETULT, JULT);
1158 SET_NEWCC(SETLE, JSLE);
1159 SET_NEWCC(SETULE, JULE);
1160 default:
1161 report_fatal_error(reason: "unimplemented select CondCode " + Twine(CC));
1162 }
1163
1164 Register LHS = MI.getOperand(i: 1).getReg();
1165 bool isSignedCmp = (CC == ISD::SETGT ||
1166 CC == ISD::SETGE ||
1167 CC == ISD::SETLT ||
1168 CC == ISD::SETLE);
1169
1170 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
1171 // to be promoted, however if the 32-bit comparison operands are destination
1172 // registers then they are implicitly zero-extended already, there is no
1173 // need of explicit zero-extend sequence for them.
1174 //
1175 // We simply do extension for all situations in this method, but we will
1176 // try to remove those unnecessary in BPFMIPeephole pass.
1177 if (is32BitCmp && !HasJmp32)
1178 LHS = EmitSubregExt(MI, BB, Reg: LHS, isSigned: isSignedCmp);
1179
1180 if (isSelectRROp) {
1181 Register RHS = MI.getOperand(i: 2).getReg();
1182
1183 if (is32BitCmp && !HasJmp32)
1184 RHS = EmitSubregExt(MI, BB, Reg: RHS, isSigned: isSignedCmp);
1185
1186 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: NewCC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: Copy1MBB);
1187 } else {
1188 int64_t imm32 = MI.getOperand(i: 2).getImm();
1189 // Check before we build J*_ri instruction.
1190 if (!isInt<32>(x: imm32))
1191 report_fatal_error(reason: "immediate overflows 32 bits: " + Twine(imm32));
1192 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: NewCC))
1193 .addReg(RegNo: LHS).addImm(Val: imm32).addMBB(MBB: Copy1MBB);
1194 }
1195
1196 // Copy0MBB:
1197 // %FalseValue = ...
1198 // # fallthrough to Copy1MBB
1199 BB = Copy0MBB;
1200
1201 // Update machine-CFG edges
1202 BB->addSuccessor(Succ: Copy1MBB);
1203
1204 // Copy1MBB:
1205 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
1206 // ...
1207 BB = Copy1MBB;
1208 BuildMI(BB&: *BB, I: BB->begin(), MIMD: DL, MCID: TII.get(Opcode: BPF::PHI), DestReg: MI.getOperand(i: 0).getReg())
1209 .addReg(RegNo: MI.getOperand(i: 5).getReg())
1210 .addMBB(MBB: Copy0MBB)
1211 .addReg(RegNo: MI.getOperand(i: 4).getReg())
1212 .addMBB(MBB: ThisMBB);
1213
1214 MI.eraseFromParent(); // The pseudo instruction is gone now.
1215 return BB;
1216}
1217
1218EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
1219 EVT VT) const {
1220 return getHasAlu32() ? MVT::i32 : MVT::i64;
1221}
1222
1223MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1224 EVT VT) const {
1225 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
1226}
1227
1228bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1229 const AddrMode &AM, Type *Ty,
1230 unsigned AS,
1231 Instruction *I) const {
1232 // No global is ever allowed as a base.
1233 if (AM.BaseGV)
1234 return false;
1235
1236 switch (AM.Scale) {
1237 case 0: // "r+i" or just "i", depending on HasBaseReg.
1238 break;
1239 case 1:
1240 if (!AM.HasBaseReg) // allow "r+i".
1241 break;
1242 return false; // disallow "r+r" or "r+r+i".
1243 default:
1244 return false;
1245 }
1246
1247 return true;
1248}
1249
1250bool BPFTargetLowering::CanLowerReturn(
1251 CallingConv::ID CallConv, MachineFunction &MF, bool IsVarArg,
1252 const SmallVectorImpl<ISD::OutputArg> &Outs, LLVMContext &Context,
1253 const Type *RetTy) const {
1254 SmallVector<CCValAssign, 16> RVLocs;
1255 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, Context);
1256 return CCInfo.CheckReturn(Outs, Fn: getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
1257}
1258