1//===- RISCVGatherScatterLowering.cpp - Gather/Scatter lowering -----------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass custom lowers llvm.gather and llvm.scatter instructions to
10// RISC-V intrinsics.
11//
12//===----------------------------------------------------------------------===//
13
14#include "RISCV.h"
15#include "RISCVTargetMachine.h"
16#include "llvm/Analysis/InstSimplifyFolder.h"
17#include "llvm/Analysis/LoopInfo.h"
18#include "llvm/Analysis/ValueTracking.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/CodeGen/TargetPassConfig.h"
21#include "llvm/IR/GetElementPtrTypeIterator.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/IntrinsicInst.h"
24#include "llvm/IR/PatternMatch.h"
25#include "llvm/Transforms/Utils/Local.h"
26#include <optional>
27
28using namespace llvm;
29using namespace PatternMatch;
30
31#define DEBUG_TYPE "riscv-gather-scatter-lowering"
32
33namespace {
34
35class RISCVGatherScatterLowering : public FunctionPass {
36 const RISCVSubtarget *ST = nullptr;
37 const RISCVTargetLowering *TLI = nullptr;
38 LoopInfo *LI = nullptr;
39 const DataLayout *DL = nullptr;
40
41 SmallVector<WeakTrackingVH> MaybeDeadPHIs;
42
43 // Cache of the BasePtr and Stride determined from this GEP. When a GEP is
44 // used by multiple gathers/scatters, this allow us to reuse the scalar
45 // instructions we created for the first gather/scatter for the others.
46 DenseMap<GetElementPtrInst *, std::pair<Value *, Value *>> StridedAddrs;
47
48public:
49 static char ID; // Pass identification, replacement for typeid
50
51 RISCVGatherScatterLowering() : FunctionPass(ID) {}
52
53 bool runOnFunction(Function &F) override;
54
55 void getAnalysisUsage(AnalysisUsage &AU) const override {
56 AU.setPreservesCFG();
57 AU.addRequired<TargetPassConfig>();
58 AU.addRequired<LoopInfoWrapperPass>();
59 }
60
61 StringRef getPassName() const override {
62 return "RISC-V gather/scatter lowering";
63 }
64
65private:
66 bool tryCreateStridedLoadStore(IntrinsicInst *II);
67
68 std::pair<Value *, Value *> determineBaseAndStride(Instruction *Ptr,
69 IRBuilderBase &Builder);
70
71 bool matchStridedRecurrence(Value *Index, Loop *L, Value *&Stride,
72 PHINode *&BasePtr, BinaryOperator *&Inc,
73 IRBuilderBase &Builder);
74};
75
76} // end anonymous namespace
77
78char RISCVGatherScatterLowering::ID = 0;
79
80INITIALIZE_PASS(RISCVGatherScatterLowering, DEBUG_TYPE,
81 "RISC-V gather/scatter lowering pass", false, false)
82
83FunctionPass *llvm::createRISCVGatherScatterLoweringPass() {
84 return new RISCVGatherScatterLowering();
85}
86
87// TODO: Should we consider the mask when looking for a stride?
88static std::pair<Value *, Value *> matchStridedConstant(Constant *StartC) {
89 if (!isa<FixedVectorType>(Val: StartC->getType()))
90 return std::make_pair(x: nullptr, y: nullptr);
91
92 unsigned NumElts = cast<FixedVectorType>(Val: StartC->getType())->getNumElements();
93
94 // Check that the start value is a strided constant.
95 auto *StartVal =
96 dyn_cast_or_null<ConstantInt>(Val: StartC->getAggregateElement(Elt: (unsigned)0));
97 if (!StartVal)
98 return std::make_pair(x: nullptr, y: nullptr);
99 APInt StrideVal(StartVal->getValue().getBitWidth(), 0);
100 ConstantInt *Prev = StartVal;
101 for (unsigned i = 1; i != NumElts; ++i) {
102 auto *C = dyn_cast_or_null<ConstantInt>(Val: StartC->getAggregateElement(Elt: i));
103 if (!C)
104 return std::make_pair(x: nullptr, y: nullptr);
105
106 APInt LocalStride = C->getValue() - Prev->getValue();
107 if (i == 1)
108 StrideVal = LocalStride;
109 else if (StrideVal != LocalStride)
110 return std::make_pair(x: nullptr, y: nullptr);
111
112 Prev = C;
113 }
114
115 Value *Stride = ConstantInt::get(Ty: StartVal->getType(), V: StrideVal);
116
117 return std::make_pair(x&: StartVal, y&: Stride);
118}
119
120static std::pair<Value *, Value *> matchStridedStart(Value *Start,
121 IRBuilderBase &Builder) {
122 // Base case, start is a strided constant.
123 auto *StartC = dyn_cast<Constant>(Val: Start);
124 if (StartC)
125 return matchStridedConstant(StartC);
126
127 // Base case, start is a stepvector
128 if (match(V: Start, P: m_Intrinsic<Intrinsic::stepvector>())) {
129 auto *Ty = Start->getType()->getScalarType();
130 return std::make_pair(x: ConstantInt::get(Ty, V: 0), y: ConstantInt::get(Ty, V: 1));
131 }
132
133 // Not a constant, maybe it's a strided constant with a splat added or
134 // multiplied.
135 auto *BO = dyn_cast<BinaryOperator>(Val: Start);
136 if (!BO || (BO->getOpcode() != Instruction::Add &&
137 BO->getOpcode() != Instruction::Or &&
138 BO->getOpcode() != Instruction::Shl &&
139 BO->getOpcode() != Instruction::Mul))
140 return std::make_pair(x: nullptr, y: nullptr);
141
142 if (BO->getOpcode() == Instruction::Or &&
143 !cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
144 return std::make_pair(x: nullptr, y: nullptr);
145
146 // Look for an operand that is splatted.
147 unsigned OtherIndex = 0;
148 Value *Splat = getSplatValue(V: BO->getOperand(i_nocapture: 1));
149 if (!Splat && Instruction::isCommutative(Opcode: BO->getOpcode())) {
150 Splat = getSplatValue(V: BO->getOperand(i_nocapture: 0));
151 OtherIndex = 1;
152 }
153 if (!Splat)
154 return std::make_pair(x: nullptr, y: nullptr);
155
156 Value *Stride;
157 std::tie(args&: Start, args&: Stride) = matchStridedStart(Start: BO->getOperand(i_nocapture: OtherIndex),
158 Builder);
159 if (!Start)
160 return std::make_pair(x: nullptr, y: nullptr);
161
162 Builder.SetInsertPoint(BO);
163 Builder.SetCurrentDebugLocation(DebugLoc());
164 // Add the splat value to the start or multiply the start and stride by the
165 // splat.
166 switch (BO->getOpcode()) {
167 default:
168 llvm_unreachable("Unexpected opcode");
169 case Instruction::Or:
170 Start = Builder.CreateOr(LHS: Start, RHS: Splat, Name: "", /*IsDisjoint=*/true);
171 break;
172 case Instruction::Add:
173 Start = Builder.CreateAdd(LHS: Start, RHS: Splat);
174 break;
175 case Instruction::Mul:
176 Start = Builder.CreateMul(LHS: Start, RHS: Splat);
177 Stride = Builder.CreateMul(LHS: Stride, RHS: Splat);
178 break;
179 case Instruction::Shl:
180 Start = Builder.CreateShl(LHS: Start, RHS: Splat);
181 Stride = Builder.CreateShl(LHS: Stride, RHS: Splat);
182 break;
183 }
184
185 return std::make_pair(x&: Start, y&: Stride);
186}
187
188// Recursively, walk about the use-def chain until we find a Phi with a strided
189// start value. Build and update a scalar recurrence as we unwind the recursion.
190// We also update the Stride as we unwind. Our goal is to move all of the
191// arithmetic out of the loop.
192bool RISCVGatherScatterLowering::matchStridedRecurrence(Value *Index, Loop *L,
193 Value *&Stride,
194 PHINode *&BasePtr,
195 BinaryOperator *&Inc,
196 IRBuilderBase &Builder) {
197 // Our base case is a Phi.
198 if (auto *Phi = dyn_cast<PHINode>(Val: Index)) {
199 // A phi node we want to perform this function on should be from the
200 // loop header.
201 if (Phi->getParent() != L->getHeader())
202 return false;
203
204 Value *Step, *Start;
205 if (!matchSimpleRecurrence(P: Phi, BO&: Inc, Start, Step) ||
206 Inc->getOpcode() != Instruction::Add)
207 return false;
208 assert(Phi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
209 unsigned IncrementingBlock = Phi->getIncomingValue(i: 0) == Inc ? 0 : 1;
210 assert(Phi->getIncomingValue(IncrementingBlock) == Inc &&
211 "Expected one operand of phi to be Inc");
212
213 // Step should be a splat.
214 Step = getSplatValue(V: Step);
215 if (!Step)
216 return false;
217
218 std::tie(args&: Start, args&: Stride) = matchStridedStart(Start, Builder);
219 if (!Start)
220 return false;
221 assert(Stride != nullptr);
222
223 // Build scalar phi and increment.
224 BasePtr =
225 PHINode::Create(Ty: Start->getType(), NumReservedValues: 2, NameStr: Phi->getName() + ".scalar", InsertBefore: Phi->getIterator());
226 Inc = BinaryOperator::CreateAdd(V1: BasePtr, V2: Step, Name: Inc->getName() + ".scalar",
227 InsertBefore: Inc->getIterator());
228 BasePtr->addIncoming(V: Start, BB: Phi->getIncomingBlock(i: 1 - IncrementingBlock));
229 BasePtr->addIncoming(V: Inc, BB: Phi->getIncomingBlock(i: IncrementingBlock));
230
231 // Note that this Phi might be eligible for removal.
232 MaybeDeadPHIs.push_back(Elt: Phi);
233 return true;
234 }
235
236 // Otherwise look for binary operator.
237 auto *BO = dyn_cast<BinaryOperator>(Val: Index);
238 if (!BO)
239 return false;
240
241 switch (BO->getOpcode()) {
242 default:
243 return false;
244 case Instruction::Or:
245 // We need to be able to treat Or as Add.
246 if (!cast<PossiblyDisjointInst>(Val: BO)->isDisjoint())
247 return false;
248 break;
249 case Instruction::Add:
250 break;
251 case Instruction::Shl:
252 break;
253 case Instruction::Mul:
254 break;
255 }
256
257 // We should have one operand in the loop and one splat.
258 Value *OtherOp;
259 if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 0)) &&
260 L->contains(Inst: cast<Instruction>(Val: BO->getOperand(i_nocapture: 0)))) {
261 Index = cast<Instruction>(Val: BO->getOperand(i_nocapture: 0));
262 OtherOp = BO->getOperand(i_nocapture: 1);
263 } else if (isa<Instruction>(Val: BO->getOperand(i_nocapture: 1)) &&
264 L->contains(Inst: cast<Instruction>(Val: BO->getOperand(i_nocapture: 1))) &&
265 Instruction::isCommutative(Opcode: BO->getOpcode())) {
266 Index = cast<Instruction>(Val: BO->getOperand(i_nocapture: 1));
267 OtherOp = BO->getOperand(i_nocapture: 0);
268 } else {
269 return false;
270 }
271
272 // Make sure other op is loop invariant.
273 if (!L->isLoopInvariant(V: OtherOp))
274 return false;
275
276 // Make sure we have a splat.
277 Value *SplatOp = getSplatValue(V: OtherOp);
278 if (!SplatOp)
279 return false;
280
281 // Recurse up the use-def chain.
282 if (!matchStridedRecurrence(Index, L, Stride, BasePtr, Inc, Builder))
283 return false;
284
285 // Locate the Step and Start values from the recurrence.
286 unsigned StepIndex = Inc->getOperand(i_nocapture: 0) == BasePtr ? 1 : 0;
287 unsigned StartBlock = BasePtr->getOperand(i_nocapture: 0) == Inc ? 1 : 0;
288 Value *Step = Inc->getOperand(i_nocapture: StepIndex);
289 Value *Start = BasePtr->getOperand(i_nocapture: StartBlock);
290
291 // We need to adjust the start value in the preheader.
292 Builder.SetInsertPoint(
293 BasePtr->getIncomingBlock(i: StartBlock)->getTerminator());
294 Builder.SetCurrentDebugLocation(DebugLoc());
295
296 // TODO: Share this switch with matchStridedStart?
297 switch (BO->getOpcode()) {
298 default:
299 llvm_unreachable("Unexpected opcode!");
300 case Instruction::Add:
301 case Instruction::Or: {
302 // An add only affects the start value. It's ok to do this for Or because
303 // we already checked that there are no common set bits.
304 Start = Builder.CreateAdd(LHS: Start, RHS: SplatOp, Name: "start");
305 break;
306 }
307 case Instruction::Mul: {
308 Start = Builder.CreateMul(LHS: Start, RHS: SplatOp, Name: "start");
309 Stride = Builder.CreateMul(LHS: Stride, RHS: SplatOp, Name: "stride");
310 break;
311 }
312 case Instruction::Shl: {
313 Start = Builder.CreateShl(LHS: Start, RHS: SplatOp, Name: "start");
314 Stride = Builder.CreateShl(LHS: Stride, RHS: SplatOp, Name: "stride");
315 break;
316 }
317 }
318
319 // If the Step was defined inside the loop, adjust it before its definition
320 // instead of in the preheader.
321 if (auto *StepI = dyn_cast<Instruction>(Val: Step); StepI && L->contains(Inst: StepI))
322 Builder.SetInsertPoint(*StepI->getInsertionPointAfterDef());
323
324 switch (BO->getOpcode()) {
325 default:
326 break;
327 case Instruction::Mul:
328 Step = Builder.CreateMul(LHS: Step, RHS: SplatOp, Name: "step");
329 break;
330 case Instruction::Shl:
331 Step = Builder.CreateShl(LHS: Step, RHS: SplatOp, Name: "step");
332 break;
333 }
334
335 Inc->setOperand(i_nocapture: StepIndex, Val_nocapture: Step);
336 BasePtr->setIncomingValue(i: StartBlock, V: Start);
337 return true;
338}
339
340std::pair<Value *, Value *>
341RISCVGatherScatterLowering::determineBaseAndStride(Instruction *Ptr,
342 IRBuilderBase &Builder) {
343
344 // A gather/scatter of a splat is a zero strided load/store.
345 if (auto *BasePtr = getSplatValue(V: Ptr)) {
346 Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
347 return std::make_pair(x&: BasePtr, y: ConstantInt::get(Ty: IntPtrTy, V: 0));
348 }
349
350 auto *GEP = dyn_cast<GetElementPtrInst>(Val: Ptr);
351 if (!GEP)
352 return std::make_pair(x: nullptr, y: nullptr);
353
354 auto I = StridedAddrs.find(Val: GEP);
355 if (I != StridedAddrs.end())
356 return I->second;
357
358 SmallVector<Value *, 2> Ops(GEP->operands());
359
360 // If the base pointer is a vector, check if it's strided.
361 Value *Base = GEP->getPointerOperand();
362 if (auto *BaseInst = dyn_cast<Instruction>(Val: Base);
363 BaseInst && BaseInst->getType()->isVectorTy()) {
364 // If GEP's offset is scalar then we can add it to the base pointer's base.
365 auto IsScalar = [](Value *Idx) { return !Idx->getType()->isVectorTy(); };
366 if (all_of(Range: GEP->indices(), P: IsScalar)) {
367 auto [BaseBase, Stride] = determineBaseAndStride(Ptr: BaseInst, Builder);
368 if (BaseBase) {
369 Builder.SetInsertPoint(GEP);
370 SmallVector<Value *> Indices(GEP->indices());
371 Value *OffsetBase =
372 Builder.CreateGEP(Ty: GEP->getSourceElementType(), Ptr: BaseBase, IdxList: Indices,
373 Name: GEP->getName() + "offset", NW: GEP->isInBounds());
374 return {OffsetBase, Stride};
375 }
376 }
377 }
378
379 // Base pointer needs to be a scalar.
380 Value *ScalarBase = Base;
381 if (ScalarBase->getType()->isVectorTy()) {
382 ScalarBase = getSplatValue(V: ScalarBase);
383 if (!ScalarBase)
384 return std::make_pair(x: nullptr, y: nullptr);
385 }
386
387 std::optional<unsigned> VecOperand;
388 unsigned TypeScale = 0;
389
390 // Look for a vector operand and scale.
391 gep_type_iterator GTI = gep_type_begin(GEP);
392 for (unsigned i = 1, e = GEP->getNumOperands(); i != e; ++i, ++GTI) {
393 if (!Ops[i]->getType()->isVectorTy())
394 continue;
395
396 if (VecOperand)
397 return std::make_pair(x: nullptr, y: nullptr);
398
399 VecOperand = i;
400
401 TypeSize TS = GTI.getSequentialElementStride(DL: *DL);
402 if (TS.isScalable())
403 return std::make_pair(x: nullptr, y: nullptr);
404
405 TypeScale = TS.getFixedValue();
406 }
407
408 // We need to find a vector index to simplify.
409 if (!VecOperand)
410 return std::make_pair(x: nullptr, y: nullptr);
411
412 // We can't extract the stride if the arithmetic is done at a different size
413 // than the pointer type. Adding the stride later may not wrap correctly.
414 // Technically we could handle wider indices, but I don't expect that in
415 // practice. Handle one special case here - constants. This simplifies
416 // writing test cases.
417 Value *VecIndex = Ops[*VecOperand];
418 Type *VecIntPtrTy = DL->getIntPtrType(GEP->getType());
419 if (VecIndex->getType() != VecIntPtrTy) {
420 auto *VecIndexC = dyn_cast<Constant>(Val: VecIndex);
421 if (!VecIndexC)
422 return std::make_pair(x: nullptr, y: nullptr);
423 if (VecIndex->getType()->getScalarSizeInBits() > VecIntPtrTy->getScalarSizeInBits())
424 VecIndex = ConstantFoldCastInstruction(opcode: Instruction::Trunc, V: VecIndexC, DestTy: VecIntPtrTy);
425 else
426 VecIndex = ConstantFoldCastInstruction(opcode: Instruction::SExt, V: VecIndexC, DestTy: VecIntPtrTy);
427 }
428
429 // Handle the non-recursive case. This is what we see if the vectorizer
430 // decides to use a scalar IV + vid on demand instead of a vector IV.
431 auto [Start, Stride] = matchStridedStart(Start: VecIndex, Builder);
432 if (Start) {
433 assert(Stride);
434 Builder.SetInsertPoint(GEP);
435
436 // Replace the vector index with the scalar start and build a scalar GEP.
437 Ops[*VecOperand] = Start;
438 Type *SourceTy = GEP->getSourceElementType();
439 Value *BasePtr =
440 Builder.CreateGEP(Ty: SourceTy, Ptr: ScalarBase, IdxList: ArrayRef(Ops).drop_front());
441
442 // Convert stride to pointer size if needed.
443 Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
444 assert(Stride->getType() == IntPtrTy && "Unexpected type");
445
446 // Scale the stride by the size of the indexed type.
447 if (TypeScale != 1)
448 Stride = Builder.CreateMul(LHS: Stride, RHS: ConstantInt::get(Ty: IntPtrTy, V: TypeScale));
449
450 auto P = std::make_pair(x&: BasePtr, y&: Stride);
451 StridedAddrs[GEP] = P;
452 return P;
453 }
454
455 // Make sure we're in a loop and that has a pre-header and a single latch.
456 Loop *L = LI->getLoopFor(BB: GEP->getParent());
457 if (!L || !L->getLoopPreheader() || !L->getLoopLatch())
458 return std::make_pair(x: nullptr, y: nullptr);
459
460 BinaryOperator *Inc;
461 PHINode *BasePhi;
462 if (!matchStridedRecurrence(Index: VecIndex, L, Stride, BasePtr&: BasePhi, Inc, Builder))
463 return std::make_pair(x: nullptr, y: nullptr);
464
465 assert(BasePhi->getNumIncomingValues() == 2 && "Expected 2 operand phi.");
466 unsigned IncrementingBlock = BasePhi->getOperand(i_nocapture: 0) == Inc ? 0 : 1;
467 assert(BasePhi->getIncomingValue(IncrementingBlock) == Inc &&
468 "Expected one operand of phi to be Inc");
469
470 Builder.SetInsertPoint(GEP);
471
472 // Replace the vector index with the scalar phi and build a scalar GEP.
473 Ops[*VecOperand] = BasePhi;
474 Type *SourceTy = GEP->getSourceElementType();
475 Value *BasePtr =
476 Builder.CreateGEP(Ty: SourceTy, Ptr: ScalarBase, IdxList: ArrayRef(Ops).drop_front());
477
478 // Final adjustments to stride should go in the start block.
479 Builder.SetInsertPoint(
480 BasePhi->getIncomingBlock(i: 1 - IncrementingBlock)->getTerminator());
481
482 // Convert stride to pointer size if needed.
483 Type *IntPtrTy = DL->getIntPtrType(BasePtr->getType());
484 assert(Stride->getType() == IntPtrTy && "Unexpected type");
485
486 // Scale the stride by the size of the indexed type.
487 if (TypeScale != 1)
488 Stride = Builder.CreateMul(LHS: Stride, RHS: ConstantInt::get(Ty: IntPtrTy, V: TypeScale));
489
490 auto P = std::make_pair(x&: BasePtr, y&: Stride);
491 StridedAddrs[GEP] = P;
492 return P;
493}
494
495bool RISCVGatherScatterLowering::tryCreateStridedLoadStore(IntrinsicInst *II) {
496 VectorType *DataType;
497 Value *StoreVal = nullptr, *Ptr, *Mask, *EVL = nullptr;
498 Align Alignment;
499 switch (II->getIntrinsicID()) {
500 case Intrinsic::masked_gather:
501 DataType = cast<VectorType>(Val: II->getType());
502 Ptr = II->getArgOperand(i: 0);
503 Alignment = II->getParamAlign(ArgNo: 0).valueOrOne();
504 Mask = II->getArgOperand(i: 1);
505 break;
506 case Intrinsic::vp_gather:
507 DataType = cast<VectorType>(Val: II->getType());
508 Ptr = II->getArgOperand(i: 0);
509 // FIXME: Falling back to ABI alignment is incorrect.
510 Alignment = II->getParamAlign(ArgNo: 0).value_or(
511 u: DL->getABITypeAlign(Ty: DataType->getElementType()));
512 Mask = II->getArgOperand(i: 1);
513 EVL = II->getArgOperand(i: 2);
514 break;
515 case Intrinsic::masked_scatter:
516 DataType = cast<VectorType>(Val: II->getArgOperand(i: 0)->getType());
517 StoreVal = II->getArgOperand(i: 0);
518 Ptr = II->getArgOperand(i: 1);
519 Alignment = II->getParamAlign(ArgNo: 1).valueOrOne();
520 Mask = II->getArgOperand(i: 2);
521 break;
522 case Intrinsic::vp_scatter:
523 DataType = cast<VectorType>(Val: II->getArgOperand(i: 0)->getType());
524 StoreVal = II->getArgOperand(i: 0);
525 Ptr = II->getArgOperand(i: 1);
526 // FIXME: Falling back to ABI alignment is incorrect.
527 Alignment = II->getParamAlign(ArgNo: 1).value_or(
528 u: DL->getABITypeAlign(Ty: DataType->getElementType()));
529 Mask = II->getArgOperand(i: 2);
530 EVL = II->getArgOperand(i: 3);
531 break;
532 default:
533 llvm_unreachable("Unexpected intrinsic");
534 }
535
536 // Make sure the operation will be supported by the backend.
537 EVT DataTypeVT = TLI->getValueType(DL: *DL, Ty: DataType);
538 if (!TLI->isLegalStridedLoadStore(DataType: DataTypeVT, Alignment))
539 return false;
540
541 // FIXME: Let the backend type legalize by splitting/widening?
542 if (!TLI->isTypeLegal(VT: DataTypeVT))
543 return false;
544
545 // Pointer should be an instruction.
546 auto *PtrI = dyn_cast<Instruction>(Val: Ptr);
547 if (!PtrI)
548 return false;
549
550 LLVMContext &Ctx = PtrI->getContext();
551 IRBuilder Builder(Ctx, InstSimplifyFolder(*DL));
552 Builder.SetInsertPoint(PtrI);
553
554 Value *BasePtr, *Stride;
555 std::tie(args&: BasePtr, args&: Stride) = determineBaseAndStride(Ptr: PtrI, Builder);
556 if (!BasePtr)
557 return false;
558 assert(Stride != nullptr);
559
560 Builder.SetInsertPoint(II);
561
562 if (!EVL)
563 EVL = Builder.CreateElementCount(
564 Ty: Builder.getInt32Ty(), EC: cast<VectorType>(Val: DataType)->getElementCount());
565
566 Value *Call;
567
568 if (!StoreVal) {
569 Call = Builder.CreateIntrinsic(
570 ID: Intrinsic::experimental_vp_strided_load,
571 Types: {DataType, BasePtr->getType(), Stride->getType()},
572 Args: {BasePtr, Stride, Mask, EVL});
573
574 // Merge llvm.masked.gather's passthru
575 if (II->getIntrinsicID() == Intrinsic::masked_gather)
576 Call = Builder.CreateSelect(C: Mask, True: Call, False: II->getArgOperand(i: 2));
577 } else
578 Call = Builder.CreateIntrinsic(
579 ID: Intrinsic::experimental_vp_strided_store,
580 Types: {DataType, BasePtr->getType(), Stride->getType()},
581 Args: {StoreVal, BasePtr, Stride, Mask, EVL});
582
583 Call->takeName(V: II);
584 II->replaceAllUsesWith(V: Call);
585 II->eraseFromParent();
586
587 if (PtrI->use_empty())
588 RecursivelyDeleteTriviallyDeadInstructions(V: PtrI);
589
590 return true;
591}
592
593bool RISCVGatherScatterLowering::runOnFunction(Function &F) {
594 if (skipFunction(F))
595 return false;
596
597 auto &TPC = getAnalysis<TargetPassConfig>();
598 auto &TM = TPC.getTM<RISCVTargetMachine>();
599 ST = &TM.getSubtarget<RISCVSubtarget>(F);
600 if (!ST->hasVInstructions() || !ST->useRVVForFixedLengthVectors())
601 return false;
602
603 TLI = ST->getTargetLowering();
604 DL = &F.getDataLayout();
605 LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
606
607 StridedAddrs.clear();
608
609 SmallVector<IntrinsicInst *, 4> Worklist;
610
611 bool Changed = false;
612
613 for (BasicBlock &BB : F) {
614 for (Instruction &I : BB) {
615 IntrinsicInst *II = dyn_cast<IntrinsicInst>(Val: &I);
616 if (!II)
617 continue;
618 switch (II->getIntrinsicID()) {
619 case Intrinsic::masked_gather:
620 case Intrinsic::masked_scatter:
621 case Intrinsic::vp_gather:
622 case Intrinsic::vp_scatter:
623 Worklist.push_back(Elt: II);
624 break;
625 default:
626 break;
627 }
628 }
629 }
630
631 // Rewrite gather/scatter to form strided load/store if possible.
632 for (auto *II : Worklist)
633 Changed |= tryCreateStridedLoadStore(II);
634
635 // Remove any dead phis.
636 while (!MaybeDeadPHIs.empty()) {
637 if (auto *Phi = dyn_cast_or_null<PHINode>(Val: MaybeDeadPHIs.pop_back_val()))
638 RecursivelyDeleteDeadPHINode(PN: Phi);
639 }
640
641 return Changed;
642}
643