1//===-- BPFISelLowering.cpp - BPF DAG Lowering Implementation ------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file defines the interfaces that BPF uses to lower LLVM code into a
10// selection DAG.
11//
12//===----------------------------------------------------------------------===//
13
14#include "BPFISelLowering.h"
15#include "BPF.h"
16#include "BPFSubtarget.h"
17#include "llvm/CodeGen/CallingConvLower.h"
18#include "llvm/CodeGen/MachineFrameInfo.h"
19#include "llvm/CodeGen/MachineFunction.h"
20#include "llvm/CodeGen/MachineInstrBuilder.h"
21#include "llvm/CodeGen/MachineJumpTableInfo.h"
22#include "llvm/CodeGen/MachineRegisterInfo.h"
23#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
24#include "llvm/CodeGen/ValueTypes.h"
25#include "llvm/IR/DIBuilder.h"
26#include "llvm/IR/DiagnosticInfo.h"
27#include "llvm/IR/DiagnosticPrinter.h"
28#include "llvm/IR/Module.h"
29#include "llvm/Support/Debug.h"
30#include "llvm/Support/ErrorHandling.h"
31#include "llvm/Support/MathExtras.h"
32#include "llvm/Support/raw_ostream.h"
33
34using namespace llvm;
35
36#define DEBUG_TYPE "bpf-lower"
37
38static cl::opt<bool> BPFExpandMemcpyInOrder("bpf-expand-memcpy-in-order",
39 cl::Hidden, cl::init(Val: false),
40 cl::desc("Expand memcpy into load/store pairs in order"));
41
42static cl::opt<unsigned> BPFMinimumJumpTableEntries(
43 "bpf-min-jump-table-entries", cl::init(Val: 13), cl::Hidden,
44 cl::desc("Set minimum number of entries to use a jump table on BPF"));
45
46static void fail(const SDLoc &DL, SelectionDAG &DAG, const Twine &Msg,
47 SDValue Val = {}) {
48 std::string Str;
49 if (Val) {
50 raw_string_ostream OS(Str);
51 Val->print(OS);
52 OS << ' ';
53 }
54 MachineFunction &MF = DAG.getMachineFunction();
55 DAG.getContext()->diagnose(DI: DiagnosticInfoUnsupported(
56 MF.getFunction(), Twine(Str).concat(Suffix: Msg), DL.getDebugLoc()));
57}
58
59BPFTargetLowering::BPFTargetLowering(const TargetMachine &TM,
60 const BPFSubtarget &STI)
61 : TargetLowering(TM, STI) {
62
63 // Set up the register classes.
64 addRegisterClass(VT: MVT::i64, RC: &BPF::GPRRegClass);
65 if (STI.getHasAlu32())
66 addRegisterClass(VT: MVT::i32, RC: &BPF::GPR32RegClass);
67
68 // Compute derived properties from the register classes
69 computeRegisterProperties(TRI: STI.getRegisterInfo());
70
71 setStackPointerRegisterToSaveRestore(BPF::R11);
72
73 setOperationAction(Op: ISD::BR_CC, VT: MVT::i64, Action: Custom);
74 setOperationAction(Op: ISD::BR_JT, VT: MVT::Other, Action: Expand);
75 setOperationAction(Op: ISD::BRCOND, VT: MVT::Other, Action: Expand);
76
77 if (!STI.hasGotox())
78 setOperationAction(Op: ISD::BRIND, VT: MVT::Other, Action: Expand);
79
80 setOperationAction(Op: ISD::TRAP, VT: MVT::Other, Action: Custom);
81
82 setOperationAction(Ops: {ISD::GlobalAddress, ISD::ConstantPool}, VT: MVT::i64, Action: Custom);
83 if (STI.hasGotox())
84 setOperationAction(Ops: {ISD::JumpTable, ISD::BlockAddress}, VT: MVT::i64, Action: Custom);
85
86 setOperationAction(Op: ISD::DYNAMIC_STACKALLOC, VT: MVT::i64, Action: Custom);
87 setOperationAction(Op: ISD::STACKSAVE, VT: MVT::Other, Action: Expand);
88 setOperationAction(Op: ISD::STACKRESTORE, VT: MVT::Other, Action: Expand);
89
90 // Set unsupported atomic operations as Custom so
91 // we can emit better error messages than fatal error
92 // from selectiondag.
93 for (auto VT : {MVT::i8, MVT::i16, MVT::i32}) {
94 if (VT == MVT::i32) {
95 if (STI.getHasAlu32())
96 continue;
97 } else {
98 setOperationAction(Op: ISD::ATOMIC_LOAD_ADD, VT, Action: Custom);
99 }
100
101 setOperationAction(Op: ISD::ATOMIC_LOAD_AND, VT, Action: Custom);
102 setOperationAction(Op: ISD::ATOMIC_LOAD_OR, VT, Action: Custom);
103 setOperationAction(Op: ISD::ATOMIC_LOAD_XOR, VT, Action: Custom);
104 setOperationAction(Op: ISD::ATOMIC_SWAP, VT, Action: Custom);
105 setOperationAction(Op: ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS, VT, Action: Custom);
106 }
107
108 for (auto VT : {MVT::i32, MVT::i64}) {
109 setOperationAction(Op: ISD::ATOMIC_LOAD, VT, Action: Custom);
110 setOperationAction(Op: ISD::ATOMIC_STORE, VT, Action: Custom);
111 }
112
113 for (auto VT : { MVT::i32, MVT::i64 }) {
114 if (VT == MVT::i32 && !STI.getHasAlu32())
115 continue;
116
117 setOperationAction(Op: ISD::SDIVREM, VT, Action: Expand);
118 setOperationAction(Op: ISD::UDIVREM, VT, Action: Expand);
119 if (!STI.hasSdivSmod()) {
120 setOperationAction(Op: ISD::SDIV, VT, Action: Custom);
121 setOperationAction(Op: ISD::SREM, VT, Action: Custom);
122 }
123 setOperationAction(Op: ISD::MULHU, VT, Action: Expand);
124 setOperationAction(Op: ISD::MULHS, VT, Action: Expand);
125 setOperationAction(Op: ISD::UMUL_LOHI, VT, Action: Expand);
126 setOperationAction(Op: ISD::SMUL_LOHI, VT, Action: Expand);
127 setOperationAction(Op: ISD::ROTR, VT, Action: Expand);
128 setOperationAction(Op: ISD::ROTL, VT, Action: Expand);
129 setOperationAction(Op: ISD::SHL_PARTS, VT, Action: Expand);
130 setOperationAction(Op: ISD::SRL_PARTS, VT, Action: Expand);
131 setOperationAction(Op: ISD::SRA_PARTS, VT, Action: Expand);
132 setOperationAction(Op: ISD::CTPOP, VT, Action: Expand);
133 setOperationAction(Op: ISD::CTTZ, VT, Action: Expand);
134 setOperationAction(Op: ISD::CTLZ, VT, Action: Expand);
135 setOperationAction(Op: ISD::CTTZ_ZERO_UNDEF, VT, Action: Expand);
136 setOperationAction(Op: ISD::CTLZ_ZERO_UNDEF, VT, Action: Expand);
137
138 setOperationAction(Op: ISD::SETCC, VT, Action: Expand);
139 setOperationAction(Op: ISD::SELECT, VT, Action: Expand);
140 setOperationAction(Op: ISD::SELECT_CC, VT, Action: Custom);
141 }
142
143 if (STI.getHasAlu32()) {
144 setOperationAction(Op: ISD::BSWAP, VT: MVT::i32, Action: Promote);
145 setOperationAction(Op: ISD::BR_CC, VT: MVT::i32,
146 Action: STI.getHasJmp32() ? Custom : Promote);
147 }
148
149 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i1, Action: Expand);
150 if (!STI.hasMovsx()) {
151 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i8, Action: Expand);
152 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i16, Action: Expand);
153 setOperationAction(Op: ISD::SIGN_EXTEND_INREG, VT: MVT::i32, Action: Expand);
154 }
155
156 // Extended load operations for i1 types must be promoted
157 for (MVT VT : MVT::integer_valuetypes()) {
158 setLoadExtAction(ExtType: ISD::EXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
159 setLoadExtAction(ExtType: ISD::ZEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
160 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i1, Action: Promote);
161
162 if (!STI.hasLdsx()) {
163 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i8, Action: Expand);
164 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i16, Action: Expand);
165 setLoadExtAction(ExtType: ISD::SEXTLOAD, ValVT: VT, MemVT: MVT::i32, Action: Expand);
166 }
167 }
168
169 setBooleanContents(ZeroOrOneBooleanContent);
170 setMaxAtomicSizeInBitsSupported(64);
171 setMinimumJumpTableEntries(BPFMinimumJumpTableEntries);
172
173 // Function alignments
174 setMinFunctionAlignment(Align(8));
175 setPrefFunctionAlignment(Align(8));
176
177 if (BPFExpandMemcpyInOrder) {
178 // LLVM generic code will try to expand memcpy into load/store pairs at this
179 // stage which is before quite a few IR optimization passes, therefore the
180 // loads and stores could potentially be moved apart from each other which
181 // will cause trouble to memcpy pattern matcher inside kernel eBPF JIT
182 // compilers.
183 //
184 // When -bpf-expand-memcpy-in-order specified, we want to defer the expand
185 // of memcpy to later stage in IR optimization pipeline so those load/store
186 // pairs won't be touched and could be kept in order. Hence, we set
187 // MaxStoresPerMem* to zero to disable the generic getMemcpyLoadsAndStores
188 // code path, and ask LLVM to use target expander EmitTargetCodeForMemcpy.
189 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = 0;
190 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = 0;
191 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = 0;
192 MaxLoadsPerMemcmp = 0;
193 } else {
194 // inline memcpy() for kernel to see explicit copy
195 unsigned CommonMaxStores =
196 STI.getSelectionDAGInfo()->getCommonMaxStoresPerMemFunc();
197
198 MaxStoresPerMemset = MaxStoresPerMemsetOptSize = CommonMaxStores;
199 MaxStoresPerMemcpy = MaxStoresPerMemcpyOptSize = CommonMaxStores;
200 MaxStoresPerMemmove = MaxStoresPerMemmoveOptSize = CommonMaxStores;
201 MaxLoadsPerMemcmp = MaxLoadsPerMemcmpOptSize = CommonMaxStores;
202 }
203
204 // CPU/Feature control
205 HasAlu32 = STI.getHasAlu32();
206 HasJmp32 = STI.getHasJmp32();
207 HasJmpExt = STI.getHasJmpExt();
208 HasMovsx = STI.hasMovsx();
209
210 AllowsMisalignedMemAccess = STI.getAllowsMisalignedMemAccess();
211}
212
213bool BPFTargetLowering::allowsMisalignedMemoryAccesses(EVT VT, unsigned, Align,
214 MachineMemOperand::Flags,
215 unsigned *Fast) const {
216 // allows-misaligned-mem-access is disabled
217 if (!AllowsMisalignedMemAccess)
218 return false;
219
220 // only allow misalignment for simple value types
221 if (!VT.isSimple())
222 return false;
223
224 // always assume fast mode when misalignment is allowed
225 if (Fast)
226 *Fast = true;
227
228 return true;
229}
230
231bool BPFTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
232 return false;
233}
234
235bool BPFTargetLowering::isTruncateFree(Type *Ty1, Type *Ty2) const {
236 if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy())
237 return false;
238 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
239 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
240 return NumBits1 > NumBits2;
241}
242
243bool BPFTargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
244 if (!VT1.isInteger() || !VT2.isInteger())
245 return false;
246 unsigned NumBits1 = VT1.getSizeInBits();
247 unsigned NumBits2 = VT2.getSizeInBits();
248 return NumBits1 > NumBits2;
249}
250
251bool BPFTargetLowering::isZExtFree(Type *Ty1, Type *Ty2) const {
252 if (!getHasAlu32() || !Ty1->isIntegerTy() || !Ty2->isIntegerTy())
253 return false;
254 unsigned NumBits1 = Ty1->getPrimitiveSizeInBits();
255 unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
256 return NumBits1 == 32 && NumBits2 == 64;
257}
258
259bool BPFTargetLowering::isZExtFree(EVT VT1, EVT VT2) const {
260 if (!getHasAlu32() || !VT1.isInteger() || !VT2.isInteger())
261 return false;
262 unsigned NumBits1 = VT1.getSizeInBits();
263 unsigned NumBits2 = VT2.getSizeInBits();
264 return NumBits1 == 32 && NumBits2 == 64;
265}
266
267bool BPFTargetLowering::isZExtFree(SDValue Val, EVT VT2) const {
268 EVT VT1 = Val.getValueType();
269 if (Val.getOpcode() == ISD::LOAD && VT1.isSimple() && VT2.isSimple()) {
270 MVT MT1 = VT1.getSimpleVT().SimpleTy;
271 MVT MT2 = VT2.getSimpleVT().SimpleTy;
272 if ((MT1 == MVT::i8 || MT1 == MVT::i16 || MT1 == MVT::i32) &&
273 (MT2 == MVT::i32 || MT2 == MVT::i64))
274 return true;
275 }
276 return TargetLoweringBase::isZExtFree(Val, VT2);
277}
278
279unsigned BPFTargetLowering::getJumpTableEncoding() const {
280 return MachineJumpTableInfo::EK_BlockAddress;
281}
282
283BPFTargetLowering::ConstraintType
284BPFTargetLowering::getConstraintType(StringRef Constraint) const {
285 if (Constraint.size() == 1) {
286 switch (Constraint[0]) {
287 default:
288 break;
289 case 'w':
290 return C_RegisterClass;
291 }
292 }
293
294 return TargetLowering::getConstraintType(Constraint);
295}
296
297std::pair<unsigned, const TargetRegisterClass *>
298BPFTargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI,
299 StringRef Constraint,
300 MVT VT) const {
301 if (Constraint.size() == 1) {
302 // GCC Constraint Letters
303 switch (Constraint[0]) {
304 case 'r': // GENERAL_REGS
305 return std::make_pair(x: 0U, y: &BPF::GPRRegClass);
306 case 'w':
307 if (HasAlu32)
308 return std::make_pair(x: 0U, y: &BPF::GPR32RegClass);
309 break;
310 default:
311 break;
312 }
313 }
314
315 return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT);
316}
317
318void BPFTargetLowering::ReplaceNodeResults(
319 SDNode *N, SmallVectorImpl<SDValue> &Results, SelectionDAG &DAG) const {
320 const char *Msg;
321 uint32_t Opcode = N->getOpcode();
322 switch (Opcode) {
323 default:
324 report_fatal_error(reason: "unhandled custom legalization: " + Twine(Opcode));
325 case ISD::ATOMIC_LOAD_ADD:
326 case ISD::ATOMIC_LOAD_AND:
327 case ISD::ATOMIC_LOAD_OR:
328 case ISD::ATOMIC_LOAD_XOR:
329 case ISD::ATOMIC_SWAP:
330 case ISD::ATOMIC_CMP_SWAP_WITH_SUCCESS:
331 if (HasAlu32 || Opcode == ISD::ATOMIC_LOAD_ADD)
332 Msg = "unsupported atomic operation, please use 32/64 bit version";
333 else
334 Msg = "unsupported atomic operation, please use 64 bit version";
335 break;
336 case ISD::ATOMIC_LOAD:
337 case ISD::ATOMIC_STORE:
338 return;
339 }
340
341 SDLoc DL(N);
342 // We'll still produce a fatal error downstream, but this diagnostic is more
343 // user-friendly.
344 fail(DL, DAG, Msg);
345}
346
347SDValue BPFTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
348 switch (Op.getOpcode()) {
349 default:
350 report_fatal_error(reason: "unimplemented opcode: " + Twine(Op.getOpcode()));
351 case ISD::BR_CC:
352 return LowerBR_CC(Op, DAG);
353 case ISD::JumpTable:
354 return LowerJumpTable(Op, DAG);
355 case ISD::GlobalAddress:
356 return LowerGlobalAddress(Op, DAG);
357 case ISD::ConstantPool:
358 return LowerConstantPool(Op, DAG);
359 case ISD::BlockAddress:
360 return LowerBlockAddress(Op, DAG);
361 case ISD::SELECT_CC:
362 return LowerSELECT_CC(Op, DAG);
363 case ISD::SDIV:
364 case ISD::SREM:
365 return LowerSDIVSREM(Op, DAG);
366 case ISD::DYNAMIC_STACKALLOC:
367 return LowerDYNAMIC_STACKALLOC(Op, DAG);
368 case ISD::ATOMIC_LOAD:
369 case ISD::ATOMIC_STORE:
370 return LowerATOMIC_LOAD_STORE(Op, DAG);
371 case ISD::TRAP:
372 return LowerTRAP(Op, DAG);
373 }
374}
375
376// Calling Convention Implementation
377#include "BPFGenCallingConv.inc"
378
379SDValue BPFTargetLowering::LowerFormalArguments(
380 SDValue Chain, CallingConv::ID CallConv, bool IsVarArg,
381 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
382 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
383 switch (CallConv) {
384 default:
385 report_fatal_error(reason: "unimplemented calling convention: " + Twine(CallConv));
386 case CallingConv::C:
387 case CallingConv::Fast:
388 break;
389 }
390
391 MachineFunction &MF = DAG.getMachineFunction();
392 MachineRegisterInfo &RegInfo = MF.getRegInfo();
393
394 // Assign locations to all of the incoming arguments.
395 SmallVector<CCValAssign, 16> ArgLocs;
396 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
397 CCInfo.AnalyzeFormalArguments(Ins, Fn: getHasAlu32() ? CC_BPF32 : CC_BPF64);
398
399 bool HasMemArgs = false;
400 for (size_t I = 0; I < ArgLocs.size(); ++I) {
401 auto &VA = ArgLocs[I];
402
403 if (VA.isRegLoc()) {
404 // Arguments passed in registers
405 EVT RegVT = VA.getLocVT();
406 MVT::SimpleValueType SimpleTy = RegVT.getSimpleVT().SimpleTy;
407 switch (SimpleTy) {
408 default: {
409 std::string Str;
410 {
411 raw_string_ostream OS(Str);
412 RegVT.print(OS);
413 }
414 report_fatal_error(reason: "unhandled argument type: " + Twine(Str));
415 }
416 case MVT::i32:
417 case MVT::i64:
418 Register VReg = RegInfo.createVirtualRegister(
419 RegClass: SimpleTy == MVT::i64 ? &BPF::GPRRegClass : &BPF::GPR32RegClass);
420 RegInfo.addLiveIn(Reg: VA.getLocReg(), vreg: VReg);
421 SDValue ArgValue = DAG.getCopyFromReg(Chain, dl: DL, Reg: VReg, VT: RegVT);
422
423 // If this is an value that has been promoted to wider types, insert an
424 // assert[sz]ext to capture this, then truncate to the right size.
425 if (VA.getLocInfo() == CCValAssign::SExt)
426 ArgValue = DAG.getNode(Opcode: ISD::AssertSext, DL, VT: RegVT, N1: ArgValue,
427 N2: DAG.getValueType(VA.getValVT()));
428 else if (VA.getLocInfo() == CCValAssign::ZExt)
429 ArgValue = DAG.getNode(Opcode: ISD::AssertZext, DL, VT: RegVT, N1: ArgValue,
430 N2: DAG.getValueType(VA.getValVT()));
431
432 if (VA.getLocInfo() != CCValAssign::Full)
433 ArgValue = DAG.getNode(Opcode: ISD::TRUNCATE, DL, VT: VA.getValVT(), Operand: ArgValue);
434
435 InVals.push_back(Elt: ArgValue);
436
437 break;
438 }
439 } else {
440 if (VA.isMemLoc())
441 HasMemArgs = true;
442 else
443 report_fatal_error(reason: "unhandled argument location");
444 InVals.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: VA.getLocVT()));
445 }
446 }
447 if (HasMemArgs)
448 fail(DL, DAG, Msg: "stack arguments are not supported");
449 if (IsVarArg)
450 fail(DL, DAG, Msg: "variadic functions are not supported");
451 if (MF.getFunction().hasStructRetAttr())
452 fail(DL, DAG, Msg: "aggregate returns are not supported");
453
454 return Chain;
455}
456
457const size_t BPFTargetLowering::MaxArgs = 5;
458
459static void resetRegMaskBit(const TargetRegisterInfo *TRI, uint32_t *RegMask,
460 MCRegister Reg) {
461 for (MCPhysReg SubReg : TRI->subregs_inclusive(Reg))
462 RegMask[SubReg / 32] &= ~(1u << (SubReg % 32));
463}
464
465static uint32_t *regMaskFromTemplate(const TargetRegisterInfo *TRI,
466 MachineFunction &MF,
467 const uint32_t *BaseRegMask) {
468 uint32_t *RegMask = MF.allocateRegMask();
469 unsigned RegMaskSize = MachineOperand::getRegMaskSize(NumRegs: TRI->getNumRegs());
470 memcpy(dest: RegMask, src: BaseRegMask, n: sizeof(RegMask[0]) * RegMaskSize);
471 return RegMask;
472}
473
474SDValue BPFTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
475 SmallVectorImpl<SDValue> &InVals) const {
476 SelectionDAG &DAG = CLI.DAG;
477 auto &Outs = CLI.Outs;
478 auto &OutVals = CLI.OutVals;
479 auto &Ins = CLI.Ins;
480 SDValue Chain = CLI.Chain;
481 SDValue Callee = CLI.Callee;
482 bool &IsTailCall = CLI.IsTailCall;
483 CallingConv::ID CallConv = CLI.CallConv;
484 bool IsVarArg = CLI.IsVarArg;
485 MachineFunction &MF = DAG.getMachineFunction();
486
487 // BPF target does not support tail call optimization.
488 IsTailCall = false;
489
490 switch (CallConv) {
491 default:
492 report_fatal_error(reason: "unsupported calling convention: " + Twine(CallConv));
493 case CallingConv::Fast:
494 case CallingConv::C:
495 break;
496 }
497
498 // Analyze operands of the call, assigning locations to each operand.
499 SmallVector<CCValAssign, 16> ArgLocs;
500 CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext());
501
502 CCInfo.AnalyzeCallOperands(Outs, Fn: getHasAlu32() ? CC_BPF32 : CC_BPF64);
503
504 unsigned NumBytes = CCInfo.getStackSize();
505
506 if (Outs.size() > MaxArgs)
507 fail(DL: CLI.DL, DAG, Msg: "too many arguments", Val: Callee);
508
509 for (auto &Arg : Outs) {
510 ISD::ArgFlagsTy Flags = Arg.Flags;
511 if (!Flags.isByVal())
512 continue;
513 fail(DL: CLI.DL, DAG, Msg: "pass by value not supported", Val: Callee);
514 break;
515 }
516
517 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
518 Chain = DAG.getCALLSEQ_START(Chain, InSize: NumBytes, OutSize: 0, DL: CLI.DL);
519
520 SmallVector<std::pair<unsigned, SDValue>, MaxArgs> RegsToPass;
521
522 // Walk arg assignments
523 for (size_t i = 0; i < std::min(a: ArgLocs.size(), b: MaxArgs); ++i) {
524 CCValAssign &VA = ArgLocs[i];
525 SDValue &Arg = OutVals[i];
526
527 // Promote the value if needed.
528 switch (VA.getLocInfo()) {
529 default:
530 report_fatal_error(reason: "unhandled location info: " + Twine(VA.getLocInfo()));
531 case CCValAssign::Full:
532 break;
533 case CCValAssign::SExt:
534 Arg = DAG.getNode(Opcode: ISD::SIGN_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
535 break;
536 case CCValAssign::ZExt:
537 Arg = DAG.getNode(Opcode: ISD::ZERO_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
538 break;
539 case CCValAssign::AExt:
540 Arg = DAG.getNode(Opcode: ISD::ANY_EXTEND, DL: CLI.DL, VT: VA.getLocVT(), Operand: Arg);
541 break;
542 }
543
544 // Push arguments into RegsToPass vector
545 if (VA.isRegLoc())
546 RegsToPass.push_back(Elt: std::make_pair(x: VA.getLocReg(), y&: Arg));
547 else
548 report_fatal_error(reason: "stack arguments are not supported");
549 }
550
551 SDValue InGlue;
552
553 // Build a sequence of copy-to-reg nodes chained together with token chain and
554 // flag operands which copy the outgoing args into registers. The InGlue in
555 // necessary since all emitted instructions must be stuck together.
556 for (auto &Reg : RegsToPass) {
557 Chain = DAG.getCopyToReg(Chain, dl: CLI.DL, Reg: Reg.first, N: Reg.second, Glue: InGlue);
558 InGlue = Chain.getValue(R: 1);
559 }
560
561 // If the callee is a GlobalAddress node (quite common, every direct call is)
562 // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
563 // Likewise ExternalSymbol -> TargetExternalSymbol.
564 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Val&: Callee)) {
565 Callee = DAG.getTargetGlobalAddress(GV: G->getGlobal(), DL: CLI.DL, VT: PtrVT,
566 offset: G->getOffset(), TargetFlags: 0);
567 } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(Val&: Callee)) {
568 if (StringRef(E->getSymbol()) != BPF_TRAP) {
569 Callee = DAG.getTargetExternalSymbol(Sym: E->getSymbol(), VT: PtrVT, TargetFlags: 0);
570 fail(DL: CLI.DL, DAG,
571 Msg: Twine("A call to built-in function '" + StringRef(E->getSymbol()) +
572 "' is not supported."));
573 }
574 }
575
576 // Returns a chain & a flag for retval copy to use.
577 SDVTList NodeTys = DAG.getVTList(VT1: MVT::Other, VT2: MVT::Glue);
578 SmallVector<SDValue, 8> Ops;
579 Ops.push_back(Elt: Chain);
580 Ops.push_back(Elt: Callee);
581
582 // Add argument registers to the end of the list so that they are
583 // known live into the call.
584 for (auto &Reg : RegsToPass)
585 Ops.push_back(Elt: DAG.getRegister(Reg: Reg.first, VT: Reg.second.getValueType()));
586
587 bool HasFastCall =
588 (CLI.CB && isa<CallInst>(Val: CLI.CB) && CLI.CB->hasFnAttr(Kind: "bpf_fastcall"));
589 const TargetRegisterInfo *TRI = MF.getSubtarget().getRegisterInfo();
590 if (HasFastCall) {
591 uint32_t *RegMask = regMaskFromTemplate(
592 TRI, MF, BaseRegMask: TRI->getCallPreservedMask(MF, CallingConv::PreserveAll));
593 for (auto const &RegPair : RegsToPass)
594 resetRegMaskBit(TRI, RegMask, Reg: RegPair.first);
595 if (!CLI.CB->getType()->isVoidTy())
596 resetRegMaskBit(TRI, RegMask, Reg: BPF::R0);
597 Ops.push_back(Elt: DAG.getRegisterMask(RegMask));
598 } else {
599 Ops.push_back(
600 Elt: DAG.getRegisterMask(RegMask: TRI->getCallPreservedMask(MF, CLI.CallConv)));
601 }
602
603 if (InGlue.getNode())
604 Ops.push_back(Elt: InGlue);
605
606 Chain = DAG.getNode(Opcode: BPFISD::CALL, DL: CLI.DL, VTList: NodeTys, Ops);
607 InGlue = Chain.getValue(R: 1);
608
609 DAG.addNoMergeSiteInfo(Node: Chain.getNode(), NoMerge: CLI.NoMerge);
610
611 // Create the CALLSEQ_END node.
612 Chain = DAG.getCALLSEQ_END(Chain, Size1: NumBytes, Size2: 0, Glue: InGlue, DL: CLI.DL);
613 InGlue = Chain.getValue(R: 1);
614
615 // Handle result values, copying them out of physregs into vregs that we
616 // return.
617 return LowerCallResult(Chain, InGlue, CallConv, IsVarArg, Ins, DL: CLI.DL, DAG,
618 InVals);
619}
620
621SDValue
622BPFTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv,
623 bool IsVarArg,
624 const SmallVectorImpl<ISD::OutputArg> &Outs,
625 const SmallVectorImpl<SDValue> &OutVals,
626 const SDLoc &DL, SelectionDAG &DAG) const {
627 unsigned Opc = BPFISD::RET_GLUE;
628
629 // CCValAssign - represent the assignment of the return value to a location
630 SmallVector<CCValAssign, 16> RVLocs;
631 MachineFunction &MF = DAG.getMachineFunction();
632
633 // CCState - Info about the registers and stack slot.
634 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
635
636 if (MF.getFunction().getReturnType()->isAggregateType()) {
637 fail(DL, DAG, Msg: "aggregate returns are not supported");
638 return DAG.getNode(Opcode: Opc, DL, VT: MVT::Other, Operand: Chain);
639 }
640
641 // Analize return values.
642 CCInfo.AnalyzeReturn(Outs, Fn: getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
643
644 SDValue Glue;
645 SmallVector<SDValue, 4> RetOps(1, Chain);
646
647 // Copy the result values into the output registers.
648 for (size_t i = 0; i != RVLocs.size(); ++i) {
649 CCValAssign &VA = RVLocs[i];
650 if (!VA.isRegLoc())
651 report_fatal_error(reason: "stack return values are not supported");
652
653 Chain = DAG.getCopyToReg(Chain, dl: DL, Reg: VA.getLocReg(), N: OutVals[i], Glue);
654
655 // Guarantee that all emitted copies are stuck together,
656 // avoiding something bad.
657 Glue = Chain.getValue(R: 1);
658 RetOps.push_back(Elt: DAG.getRegister(Reg: VA.getLocReg(), VT: VA.getLocVT()));
659 }
660
661 RetOps[0] = Chain; // Update chain.
662
663 // Add the glue if we have it.
664 if (Glue.getNode())
665 RetOps.push_back(Elt: Glue);
666
667 return DAG.getNode(Opcode: Opc, DL, VT: MVT::Other, Ops: RetOps);
668}
669
670SDValue BPFTargetLowering::LowerCallResult(
671 SDValue Chain, SDValue InGlue, CallingConv::ID CallConv, bool IsVarArg,
672 const SmallVectorImpl<ISD::InputArg> &Ins, const SDLoc &DL,
673 SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
674
675 MachineFunction &MF = DAG.getMachineFunction();
676 // Assign locations to each value returned by this call.
677 SmallVector<CCValAssign, 16> RVLocs;
678 CCState CCInfo(CallConv, IsVarArg, MF, RVLocs, *DAG.getContext());
679
680 if (Ins.size() > 1) {
681 fail(DL, DAG, Msg: "only small returns supported");
682 for (auto &In : Ins)
683 InVals.push_back(Elt: DAG.getConstant(Val: 0, DL, VT: In.VT));
684 return DAG.getCopyFromReg(Chain, dl: DL, Reg: 1, VT: Ins[0].VT, Glue: InGlue).getValue(R: 1);
685 }
686
687 CCInfo.AnalyzeCallResult(Ins, Fn: getHasAlu32() ? RetCC_BPF32 : RetCC_BPF64);
688
689 // Copy all of the result registers out of their specified physreg.
690 for (auto &Val : RVLocs) {
691 Chain = DAG.getCopyFromReg(Chain, dl: DL, Reg: Val.getLocReg(),
692 VT: Val.getValVT(), Glue: InGlue).getValue(R: 1);
693 InGlue = Chain.getValue(R: 2);
694 InVals.push_back(Elt: Chain.getValue(R: 0));
695 }
696
697 return Chain;
698}
699
700static void NegateCC(SDValue &LHS, SDValue &RHS, ISD::CondCode &CC) {
701 switch (CC) {
702 default:
703 break;
704 case ISD::SETULT:
705 case ISD::SETULE:
706 case ISD::SETLT:
707 case ISD::SETLE:
708 CC = ISD::getSetCCSwappedOperands(Operation: CC);
709 std::swap(a&: LHS, b&: RHS);
710 break;
711 }
712}
713
714SDValue BPFTargetLowering::LowerSDIVSREM(SDValue Op, SelectionDAG &DAG) const {
715 SDLoc DL(Op);
716 fail(DL, DAG,
717 Msg: "unsupported signed division, please convert to unsigned div/mod.");
718 return DAG.getUNDEF(VT: Op->getValueType(ResNo: 0));
719}
720
721SDValue BPFTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op,
722 SelectionDAG &DAG) const {
723 SDLoc DL(Op);
724 fail(DL, DAG, Msg: "unsupported dynamic stack allocation");
725 auto Ops = {DAG.getConstant(Val: 0, DL: SDLoc(), VT: Op.getValueType()), Op.getOperand(i: 0)};
726 return DAG.getMergeValues(Ops, dl: SDLoc());
727}
728
729SDValue BPFTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const {
730 SDValue Chain = Op.getOperand(i: 0);
731 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 1))->get();
732 SDValue LHS = Op.getOperand(i: 2);
733 SDValue RHS = Op.getOperand(i: 3);
734 SDValue Dest = Op.getOperand(i: 4);
735 SDLoc DL(Op);
736
737 if (!getHasJmpExt())
738 NegateCC(LHS, RHS, CC);
739
740 return DAG.getNode(Opcode: BPFISD::BR_CC, DL, VT: Op.getValueType(), N1: Chain, N2: LHS, N3: RHS,
741 N4: DAG.getConstant(Val: CC, DL, VT: LHS.getValueType()), N5: Dest);
742}
743
744SDValue BPFTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const {
745 SDValue LHS = Op.getOperand(i: 0);
746 SDValue RHS = Op.getOperand(i: 1);
747 SDValue TrueV = Op.getOperand(i: 2);
748 SDValue FalseV = Op.getOperand(i: 3);
749 ISD::CondCode CC = cast<CondCodeSDNode>(Val: Op.getOperand(i: 4))->get();
750 SDLoc DL(Op);
751
752 if (!getHasJmpExt())
753 NegateCC(LHS, RHS, CC);
754
755 SDValue TargetCC = DAG.getConstant(Val: CC, DL, VT: LHS.getValueType());
756 SDValue Ops[] = {LHS, RHS, TargetCC, TrueV, FalseV};
757
758 return DAG.getNode(Opcode: BPFISD::SELECT_CC, DL, VT: Op.getValueType(), Ops);
759}
760
761SDValue BPFTargetLowering::LowerATOMIC_LOAD_STORE(SDValue Op,
762 SelectionDAG &DAG) const {
763 SDNode *N = Op.getNode();
764 SDLoc DL(N);
765
766 if (cast<AtomicSDNode>(Val: N)->getMergedOrdering() ==
767 AtomicOrdering::SequentiallyConsistent)
768 fail(DL, DAG,
769 Msg: "sequentially consistent (seq_cst) "
770 "atomic load/store is not supported");
771
772 return Op;
773}
774
775static Function *createBPFUnreachable(Module *M) {
776 if (auto *Fn = M->getFunction(BPF_TRAP))
777 return Fn;
778
779 FunctionType *FT = FunctionType::get(Result: Type::getVoidTy(C&: M->getContext()), isVarArg: false);
780 Function *NewF =
781 Function::Create(Ty: FT, Linkage: GlobalValue::ExternalWeakLinkage, BPF_TRAP, M);
782 NewF->setDSOLocal(true);
783 NewF->setCallingConv(CallingConv::C);
784 NewF->setSection(".ksyms");
785
786 if (M->debug_compile_units().empty())
787 return NewF;
788
789 DIBuilder DBuilder(*M);
790 DITypeArray ParamTypes =
791 DBuilder.getOrCreateTypeArray(Elements: {nullptr /*void return*/});
792 DISubroutineType *FuncType = DBuilder.createSubroutineType(ParameterTypes: ParamTypes);
793 DICompileUnit *CU = *M->debug_compile_units_begin();
794 DISubprogram *SP =
795 DBuilder.createFunction(Scope: CU, BPF_TRAP, BPF_TRAP, File: nullptr, LineNo: 0, Ty: FuncType, ScopeLine: 0,
796 Flags: DINode::FlagZero, SPFlags: DISubprogram::SPFlagZero);
797 NewF->setSubprogram(SP);
798 return NewF;
799}
800
801SDValue BPFTargetLowering::LowerTRAP(SDValue Op, SelectionDAG &DAG) const {
802 MachineFunction &MF = DAG.getMachineFunction();
803 TargetLowering::CallLoweringInfo CLI(DAG);
804 SmallVector<SDValue> InVals;
805 SDNode *N = Op.getNode();
806 SDLoc DL(N);
807
808 Function *Fn = createBPFUnreachable(M: MF.getFunction().getParent());
809 auto PtrVT = getPointerTy(DL: MF.getDataLayout());
810 CLI.Callee = DAG.getTargetGlobalAddress(GV: Fn, DL, VT: PtrVT);
811 CLI.Chain = N->getOperand(Num: 0);
812 CLI.IsTailCall = false;
813 CLI.CallConv = CallingConv::C;
814 CLI.IsVarArg = false;
815 CLI.DL = DL;
816 CLI.NoMerge = false;
817 CLI.DoesNotReturn = true;
818 return LowerCall(CLI, InVals);
819}
820
821SDValue BPFTargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const {
822 JumpTableSDNode *N = cast<JumpTableSDNode>(Val&: Op);
823 return getAddr(N, DAG);
824}
825
826static SDValue getTargetNode(ConstantPoolSDNode *N, const SDLoc &DL, EVT Ty,
827 SelectionDAG &DAG, unsigned Flags) {
828 return DAG.getTargetConstantPool(C: N->getConstVal(), VT: Ty, Align: N->getAlign(),
829 Offset: N->getOffset(), TargetFlags: Flags);
830}
831
832static SDValue getTargetNode(JumpTableSDNode *N, const SDLoc &DL, EVT Ty,
833 SelectionDAG &DAG, unsigned Flags) {
834 return DAG.getTargetJumpTable(JTI: N->getIndex(), VT: Ty, TargetFlags: Flags);
835}
836
837template <class NodeTy>
838SDValue BPFTargetLowering::getAddr(NodeTy *N, SelectionDAG &DAG,
839 unsigned Flags) const {
840 SDLoc DL(N);
841
842 SDValue GA = getTargetNode(N, DL, MVT::i64, DAG, Flags);
843
844 return DAG.getNode(Opcode: BPFISD::Wrapper, DL, VT: MVT::i64, Operand: GA);
845}
846
847SDValue BPFTargetLowering::LowerGlobalAddress(SDValue Op,
848 SelectionDAG &DAG) const {
849 GlobalAddressSDNode *N = cast<GlobalAddressSDNode>(Val&: Op);
850 if (N->getOffset() != 0)
851 report_fatal_error(reason: "invalid offset for global address: " +
852 Twine(N->getOffset()));
853
854 const GlobalValue *GVal = N->getGlobal();
855 SDLoc DL(Op);
856
857 // Wrap it in a TargetGlobalAddress
858 SDValue Addr = DAG.getTargetGlobalAddress(GV: GVal, DL, VT: MVT::i64);
859
860 // Emit pseudo instruction
861 return SDValue(DAG.getMachineNode(Opcode: BPF::LDIMM64, dl: DL, VT: MVT::i64, Op1: Addr), 0);
862}
863
864SDValue BPFTargetLowering::LowerConstantPool(SDValue Op,
865 SelectionDAG &DAG) const {
866 ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Val&: Op);
867
868 return getAddr(N, DAG);
869}
870
871SDValue BPFTargetLowering::LowerBlockAddress(SDValue Op,
872 SelectionDAG &DAG) const {
873 const BlockAddress *BA = cast<BlockAddressSDNode>(Val&: Op)->getBlockAddress();
874 SDLoc DL(Op);
875
876 // Wrap it in a TargetBlockAddress
877 SDValue Addr = DAG.getTargetBlockAddress(BA, VT: MVT::i64);
878
879 // Emit pseudo instruction
880 return SDValue(DAG.getMachineNode(Opcode: BPF::LDIMM64, dl: DL, VT: MVT::i64, Op1: Addr), 0);
881}
882
883unsigned
884BPFTargetLowering::EmitSubregExt(MachineInstr &MI, MachineBasicBlock *BB,
885 unsigned Reg, bool isSigned) const {
886 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
887 const TargetRegisterClass *RC = getRegClassFor(VT: MVT::i64);
888 int RShiftOp = isSigned ? BPF::SRA_ri : BPF::SRL_ri;
889 MachineFunction *F = BB->getParent();
890 DebugLoc DL = MI.getDebugLoc();
891
892 MachineRegisterInfo &RegInfo = F->getRegInfo();
893
894 if (!isSigned) {
895 Register PromotedReg0 = RegInfo.createVirtualRegister(RegClass: RC);
896 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOV_32_64), DestReg: PromotedReg0).addReg(RegNo: Reg);
897 return PromotedReg0;
898 }
899 Register PromotedReg0 = RegInfo.createVirtualRegister(RegClass: RC);
900 Register PromotedReg1 = RegInfo.createVirtualRegister(RegClass: RC);
901 Register PromotedReg2 = RegInfo.createVirtualRegister(RegClass: RC);
902 if (HasMovsx) {
903 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOVSX_rr_32), DestReg: PromotedReg0).addReg(RegNo: Reg);
904 } else {
905 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::MOV_32_64), DestReg: PromotedReg0).addReg(RegNo: Reg);
906 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: BPF::SLL_ri), DestReg: PromotedReg1)
907 .addReg(RegNo: PromotedReg0).addImm(Val: 32);
908 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: RShiftOp), DestReg: PromotedReg2)
909 .addReg(RegNo: PromotedReg1).addImm(Val: 32);
910 }
911
912 return PromotedReg2;
913}
914
915MachineBasicBlock *
916BPFTargetLowering::EmitInstrWithCustomInserterMemcpy(MachineInstr &MI,
917 MachineBasicBlock *BB)
918 const {
919 MachineFunction *MF = MI.getParent()->getParent();
920 MachineRegisterInfo &MRI = MF->getRegInfo();
921 MachineInstrBuilder MIB(*MF, MI);
922 unsigned ScratchReg;
923
924 // This function does custom insertion during lowering BPFISD::MEMCPY which
925 // only has two register operands from memcpy semantics, the copy source
926 // address and the copy destination address.
927 //
928 // Because we will expand BPFISD::MEMCPY into load/store pairs, we will need
929 // a third scratch register to serve as the destination register of load and
930 // source register of store.
931 //
932 // The scratch register here is with the Define | Dead | EarlyClobber flags.
933 // The EarlyClobber flag has the semantic property that the operand it is
934 // attached to is clobbered before the rest of the inputs are read. Hence it
935 // must be unique among the operands to the instruction. The Define flag is
936 // needed to coerce the machine verifier that an Undef value isn't a problem
937 // as we anyway is loading memory into it. The Dead flag is needed as the
938 // value in scratch isn't supposed to be used by any other instruction.
939 ScratchReg = MRI.createVirtualRegister(RegClass: &BPF::GPRRegClass);
940 MIB.addReg(RegNo: ScratchReg,
941 Flags: RegState::Define | RegState::Dead | RegState::EarlyClobber);
942
943 return BB;
944}
945
946MachineBasicBlock *BPFTargetLowering::EmitInstrWithCustomInserterLDimm64(
947 MachineInstr &MI, MachineBasicBlock *BB) const {
948 MachineFunction *MF = BB->getParent();
949 const BPFInstrInfo *TII = MF->getSubtarget<BPFSubtarget>().getInstrInfo();
950 const TargetRegisterClass *RC = getRegClassFor(VT: MVT::i64);
951 MachineRegisterInfo &RegInfo = MF->getRegInfo();
952 DebugLoc DL = MI.getDebugLoc();
953
954 // Build address taken map for Global Varaibles and BlockAddresses
955 DenseMap<const BasicBlock *, MachineBasicBlock *> AddressTakenBBs;
956 for (MachineBasicBlock &MBB : *MF) {
957 if (const BasicBlock *BB = MBB.getBasicBlock())
958 if (BB->hasAddressTaken())
959 AddressTakenBBs[BB] = &MBB;
960 }
961
962 MachineOperand &MO = MI.getOperand(i: 1);
963 assert(MO.isBlockAddress() || MO.isGlobal());
964
965 Register ResultReg = MI.getOperand(i: 0).getReg();
966 Register TmpReg = RegInfo.createVirtualRegister(RegClass: RC);
967
968 std::vector<MachineBasicBlock *> Targets;
969 unsigned JTI;
970
971 if (MO.isBlockAddress()) {
972 auto *BA = MO.getBlockAddress();
973 MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
974 assert(TgtMBB);
975
976 Targets.push_back(x: TgtMBB);
977 JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: getJumpTableEncoding())
978 ->createJumpTableIndex(DestBBs: Targets);
979
980 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LD_imm64), DestReg: TmpReg)
981 .addJumpTableIndex(Idx: JTI);
982 BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LDD), DestReg: ResultReg)
983 .addReg(RegNo: TmpReg)
984 .addImm(Val: 0);
985 MI.eraseFromParent();
986 return BB;
987 }
988
989 // Helper: emit LD_imm64 with operand GlobalAddress or JumpTable
990 auto emitLDImm64 = [&](const GlobalValue *GV = nullptr, unsigned JTI = -1) {
991 auto MIB = BuildMI(BB&: *BB, I&: MI, MIMD: DL, MCID: TII->get(Opcode: BPF::LD_imm64), DestReg: ResultReg);
992 if (GV)
993 MIB.addGlobalAddress(GV);
994 else
995 MIB.addJumpTableIndex(Idx: JTI);
996 MI.eraseFromParent();
997 return BB;
998 };
999
1000 // Must be a global at this point
1001 const GlobalValue *GVal = MO.getGlobal();
1002 const auto *GV = dyn_cast<GlobalVariable>(Val: GVal);
1003
1004 if (!GV || GV->getLinkage() != GlobalValue::PrivateLinkage ||
1005 !GV->isConstant() || !GV->hasInitializer())
1006 return emitLDImm64(GVal);
1007
1008 const auto *CA = dyn_cast<ConstantArray>(Val: GV->getInitializer());
1009 if (!CA)
1010 return emitLDImm64(GVal);
1011
1012 for (const Use &Op : CA->operands()) {
1013 if (!isa<BlockAddress>(Val: Op))
1014 return emitLDImm64(GVal);
1015 auto *BA = cast<BlockAddress>(Val: Op);
1016 MachineBasicBlock *TgtMBB = AddressTakenBBs[BA->getBasicBlock()];
1017 assert(TgtMBB);
1018 Targets.push_back(x: TgtMBB);
1019 }
1020
1021 JTI = MF->getOrCreateJumpTableInfo(JTEntryKind: getJumpTableEncoding())
1022 ->createJumpTableIndex(DestBBs: Targets);
1023 return emitLDImm64(nullptr, JTI);
1024}
1025
1026MachineBasicBlock *
1027BPFTargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
1028 MachineBasicBlock *BB) const {
1029 const TargetInstrInfo &TII = *BB->getParent()->getSubtarget().getInstrInfo();
1030 DebugLoc DL = MI.getDebugLoc();
1031 unsigned Opc = MI.getOpcode();
1032 bool isSelectRROp = (Opc == BPF::Select ||
1033 Opc == BPF::Select_64_32 ||
1034 Opc == BPF::Select_32 ||
1035 Opc == BPF::Select_32_64);
1036
1037 bool isMemcpyOp = Opc == BPF::MEMCPY;
1038 bool isLDimm64Op = Opc == BPF::LDIMM64;
1039
1040#ifndef NDEBUG
1041 bool isSelectRIOp = (Opc == BPF::Select_Ri ||
1042 Opc == BPF::Select_Ri_64_32 ||
1043 Opc == BPF::Select_Ri_32 ||
1044 Opc == BPF::Select_Ri_32_64);
1045
1046 if (!(isSelectRROp || isSelectRIOp || isMemcpyOp || isLDimm64Op))
1047 report_fatal_error("unhandled instruction type: " + Twine(Opc));
1048#endif
1049
1050 if (isMemcpyOp)
1051 return EmitInstrWithCustomInserterMemcpy(MI, BB);
1052
1053 if (isLDimm64Op)
1054 return EmitInstrWithCustomInserterLDimm64(MI, BB);
1055
1056 bool is32BitCmp = (Opc == BPF::Select_32 ||
1057 Opc == BPF::Select_32_64 ||
1058 Opc == BPF::Select_Ri_32 ||
1059 Opc == BPF::Select_Ri_32_64);
1060
1061 // To "insert" a SELECT instruction, we actually have to insert the diamond
1062 // control-flow pattern. The incoming instruction knows the destination vreg
1063 // to set, the condition code register to branch on, the true/false values to
1064 // select between, and a branch opcode to use.
1065 const BasicBlock *LLVM_BB = BB->getBasicBlock();
1066 MachineFunction::iterator I = ++BB->getIterator();
1067
1068 // ThisMBB:
1069 // ...
1070 // TrueVal = ...
1071 // jmp_XX r1, r2 goto Copy1MBB
1072 // fallthrough --> Copy0MBB
1073 MachineBasicBlock *ThisMBB = BB;
1074 MachineFunction *F = BB->getParent();
1075 MachineBasicBlock *Copy0MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1076 MachineBasicBlock *Copy1MBB = F->CreateMachineBasicBlock(BB: LLVM_BB);
1077
1078 F->insert(MBBI: I, MBB: Copy0MBB);
1079 F->insert(MBBI: I, MBB: Copy1MBB);
1080 // Update machine-CFG edges by transferring all successors of the current
1081 // block to the new block which will contain the Phi node for the select.
1082 Copy1MBB->splice(Where: Copy1MBB->begin(), Other: BB,
1083 From: std::next(x: MachineBasicBlock::iterator(MI)), To: BB->end());
1084 Copy1MBB->transferSuccessorsAndUpdatePHIs(FromMBB: BB);
1085 // Next, add the true and fallthrough blocks as its successors.
1086 BB->addSuccessor(Succ: Copy0MBB);
1087 BB->addSuccessor(Succ: Copy1MBB);
1088
1089 // Insert Branch if Flag
1090 int CC = MI.getOperand(i: 3).getImm();
1091 int NewCC;
1092 switch (CC) {
1093#define SET_NEWCC(X, Y) \
1094 case ISD::X: \
1095 if (is32BitCmp && HasJmp32) \
1096 NewCC = isSelectRROp ? BPF::Y##_rr_32 : BPF::Y##_ri_32; \
1097 else \
1098 NewCC = isSelectRROp ? BPF::Y##_rr : BPF::Y##_ri; \
1099 break
1100 SET_NEWCC(SETGT, JSGT);
1101 SET_NEWCC(SETUGT, JUGT);
1102 SET_NEWCC(SETGE, JSGE);
1103 SET_NEWCC(SETUGE, JUGE);
1104 SET_NEWCC(SETEQ, JEQ);
1105 SET_NEWCC(SETNE, JNE);
1106 SET_NEWCC(SETLT, JSLT);
1107 SET_NEWCC(SETULT, JULT);
1108 SET_NEWCC(SETLE, JSLE);
1109 SET_NEWCC(SETULE, JULE);
1110 default:
1111 report_fatal_error(reason: "unimplemented select CondCode " + Twine(CC));
1112 }
1113
1114 Register LHS = MI.getOperand(i: 1).getReg();
1115 bool isSignedCmp = (CC == ISD::SETGT ||
1116 CC == ISD::SETGE ||
1117 CC == ISD::SETLT ||
1118 CC == ISD::SETLE);
1119
1120 // eBPF at the moment only has 64-bit comparison. Any 32-bit comparison need
1121 // to be promoted, however if the 32-bit comparison operands are destination
1122 // registers then they are implicitly zero-extended already, there is no
1123 // need of explicit zero-extend sequence for them.
1124 //
1125 // We simply do extension for all situations in this method, but we will
1126 // try to remove those unnecessary in BPFMIPeephole pass.
1127 if (is32BitCmp && !HasJmp32)
1128 LHS = EmitSubregExt(MI, BB, Reg: LHS, isSigned: isSignedCmp);
1129
1130 if (isSelectRROp) {
1131 Register RHS = MI.getOperand(i: 2).getReg();
1132
1133 if (is32BitCmp && !HasJmp32)
1134 RHS = EmitSubregExt(MI, BB, Reg: RHS, isSigned: isSignedCmp);
1135
1136 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: NewCC)).addReg(RegNo: LHS).addReg(RegNo: RHS).addMBB(MBB: Copy1MBB);
1137 } else {
1138 int64_t imm32 = MI.getOperand(i: 2).getImm();
1139 // Check before we build J*_ri instruction.
1140 if (!isInt<32>(x: imm32))
1141 report_fatal_error(reason: "immediate overflows 32 bits: " + Twine(imm32));
1142 BuildMI(BB, MIMD: DL, MCID: TII.get(Opcode: NewCC))
1143 .addReg(RegNo: LHS).addImm(Val: imm32).addMBB(MBB: Copy1MBB);
1144 }
1145
1146 // Copy0MBB:
1147 // %FalseValue = ...
1148 // # fallthrough to Copy1MBB
1149 BB = Copy0MBB;
1150
1151 // Update machine-CFG edges
1152 BB->addSuccessor(Succ: Copy1MBB);
1153
1154 // Copy1MBB:
1155 // %Result = phi [ %FalseValue, Copy0MBB ], [ %TrueValue, ThisMBB ]
1156 // ...
1157 BB = Copy1MBB;
1158 BuildMI(BB&: *BB, I: BB->begin(), MIMD: DL, MCID: TII.get(Opcode: BPF::PHI), DestReg: MI.getOperand(i: 0).getReg())
1159 .addReg(RegNo: MI.getOperand(i: 5).getReg())
1160 .addMBB(MBB: Copy0MBB)
1161 .addReg(RegNo: MI.getOperand(i: 4).getReg())
1162 .addMBB(MBB: ThisMBB);
1163
1164 MI.eraseFromParent(); // The pseudo instruction is gone now.
1165 return BB;
1166}
1167
1168EVT BPFTargetLowering::getSetCCResultType(const DataLayout &, LLVMContext &,
1169 EVT VT) const {
1170 return getHasAlu32() ? MVT::i32 : MVT::i64;
1171}
1172
1173MVT BPFTargetLowering::getScalarShiftAmountTy(const DataLayout &DL,
1174 EVT VT) const {
1175 return (getHasAlu32() && VT == MVT::i32) ? MVT::i32 : MVT::i64;
1176}
1177
1178bool BPFTargetLowering::isLegalAddressingMode(const DataLayout &DL,
1179 const AddrMode &AM, Type *Ty,
1180 unsigned AS,
1181 Instruction *I) const {
1182 // No global is ever allowed as a base.
1183 if (AM.BaseGV)
1184 return false;
1185
1186 switch (AM.Scale) {
1187 case 0: // "r+i" or just "i", depending on HasBaseReg.
1188 break;
1189 case 1:
1190 if (!AM.HasBaseReg) // allow "r+i".
1191 break;
1192 return false; // disallow "r+r" or "r+r+i".
1193 default:
1194 return false;
1195 }
1196
1197 return true;
1198}
1199