1//===-- RISCVInterleavedAccess.cpp - RISC-V Interleaved Access Transform --===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// Functions and callbacks related to the InterleavedAccessPass.
10//
11//===----------------------------------------------------------------------===//
12
13#include "RISCV.h"
14#include "RISCVISelLowering.h"
15#include "RISCVSubtarget.h"
16#include "llvm/Analysis/ValueTracking.h"
17#include "llvm/Analysis/VectorUtils.h"
18#include "llvm/CodeGen/ValueTypes.h"
19#include "llvm/IR/IRBuilder.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IntrinsicsRISCV.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/PatternMatch.h"
24
25using namespace llvm;
26
27bool RISCVTargetLowering::isLegalInterleavedAccessType(
28 VectorType *VTy, unsigned Factor, Align Alignment, unsigned AddrSpace,
29 const DataLayout &DL) const {
30 EVT VT = getValueType(DL, Ty: VTy);
31 // Don't lower vlseg/vsseg for vector types that can't be split.
32 if (!isTypeLegal(VT))
33 return false;
34
35 if (!isLegalElementTypeForRVV(ScalarTy: VT.getScalarType()) ||
36 !allowsMemoryAccessForAlignment(Context&: VTy->getContext(), DL, VT, AddrSpace,
37 Alignment))
38 return false;
39
40 MVT ContainerVT = VT.getSimpleVT();
41
42 if (auto *FVTy = dyn_cast<FixedVectorType>(Val: VTy)) {
43 if (!Subtarget.useRVVForFixedLengthVectors())
44 return false;
45 // Sometimes the interleaved access pass picks up splats as interleaves of
46 // one element. Don't lower these.
47 if (FVTy->getNumElements() < 2)
48 return false;
49
50 ContainerVT = getContainerForFixedLengthVector(VT: VT.getSimpleVT());
51 }
52
53 // Need to make sure that EMUL * NFIELDS ≤ 8
54 auto [LMUL, Fractional] = RISCVVType::decodeVLMUL(VLMul: getLMUL(VT: ContainerVT));
55 if (Fractional)
56 return true;
57 return Factor * LMUL <= 8;
58}
59
60static const Intrinsic::ID FixedVlsegIntrIds[] = {
61 Intrinsic::riscv_seg2_load_mask, Intrinsic::riscv_seg3_load_mask,
62 Intrinsic::riscv_seg4_load_mask, Intrinsic::riscv_seg5_load_mask,
63 Intrinsic::riscv_seg6_load_mask, Intrinsic::riscv_seg7_load_mask,
64 Intrinsic::riscv_seg8_load_mask};
65
66static const Intrinsic::ID FixedVlssegIntrIds[] = {
67 Intrinsic::riscv_sseg2_load_mask, Intrinsic::riscv_sseg3_load_mask,
68 Intrinsic::riscv_sseg4_load_mask, Intrinsic::riscv_sseg5_load_mask,
69 Intrinsic::riscv_sseg6_load_mask, Intrinsic::riscv_sseg7_load_mask,
70 Intrinsic::riscv_sseg8_load_mask};
71
72static const Intrinsic::ID ScalableVlssegIntrIds[] = {
73 Intrinsic::riscv_vlsseg2_mask, Intrinsic::riscv_vlsseg3_mask,
74 Intrinsic::riscv_vlsseg4_mask, Intrinsic::riscv_vlsseg5_mask,
75 Intrinsic::riscv_vlsseg6_mask, Intrinsic::riscv_vlsseg7_mask,
76 Intrinsic::riscv_vlsseg8_mask};
77
78static const Intrinsic::ID ScalableVlsegIntrIds[] = {
79 Intrinsic::riscv_vlseg2_mask, Intrinsic::riscv_vlseg3_mask,
80 Intrinsic::riscv_vlseg4_mask, Intrinsic::riscv_vlseg5_mask,
81 Intrinsic::riscv_vlseg6_mask, Intrinsic::riscv_vlseg7_mask,
82 Intrinsic::riscv_vlseg8_mask};
83
84static const Intrinsic::ID FixedVssegIntrIds[] = {
85 Intrinsic::riscv_seg2_store_mask, Intrinsic::riscv_seg3_store_mask,
86 Intrinsic::riscv_seg4_store_mask, Intrinsic::riscv_seg5_store_mask,
87 Intrinsic::riscv_seg6_store_mask, Intrinsic::riscv_seg7_store_mask,
88 Intrinsic::riscv_seg8_store_mask};
89
90static const Intrinsic::ID FixedVsssegIntrIds[] = {
91 Intrinsic::riscv_sseg2_store_mask, Intrinsic::riscv_sseg3_store_mask,
92 Intrinsic::riscv_sseg4_store_mask, Intrinsic::riscv_sseg5_store_mask,
93 Intrinsic::riscv_sseg6_store_mask, Intrinsic::riscv_sseg7_store_mask,
94 Intrinsic::riscv_sseg8_store_mask};
95
96static const Intrinsic::ID ScalableVssegIntrIds[] = {
97 Intrinsic::riscv_vsseg2_mask, Intrinsic::riscv_vsseg3_mask,
98 Intrinsic::riscv_vsseg4_mask, Intrinsic::riscv_vsseg5_mask,
99 Intrinsic::riscv_vsseg6_mask, Intrinsic::riscv_vsseg7_mask,
100 Intrinsic::riscv_vsseg8_mask};
101
102static bool isMultipleOfN(const Value *V, const DataLayout &DL, unsigned N) {
103 assert(N);
104 if (N == 1)
105 return true;
106
107 using namespace PatternMatch;
108 // Right now we're only recognizing the simplest pattern.
109 uint64_t C;
110 if (match(V, P: m_CombineOr(Ps: m_ConstantInt(V&: C),
111 Ps: m_NUWMul(L: m_Value(), R: m_ConstantInt(V&: C)))) &&
112 C && C % N == 0)
113 return true;
114
115 if (isPowerOf2_32(Value: N)) {
116 KnownBits KB = llvm::computeKnownBits(V, DL);
117 return KB.countMinTrailingZeros() >= Log2_32(Value: N);
118 }
119
120 return false;
121}
122
123/// Do the common operand retrieval and validition required by the
124/// routines below.
125static bool getMemOperands(unsigned Factor, VectorType *VTy, Type *XLenTy,
126 Instruction *I, Value *&Ptr, Value *&Mask,
127 Value *&VL, Align &Alignment) {
128
129 IRBuilder<> Builder(I);
130 const DataLayout &DL = I->getDataLayout();
131 ElementCount EC = VTy->getElementCount();
132 if (auto *LI = dyn_cast<LoadInst>(Val: I)) {
133 assert(LI->isSimple());
134 Ptr = LI->getPointerOperand();
135 Alignment = LI->getAlign();
136 assert(!Mask && "Unexpected mask on a load");
137 Mask = Builder.getAllOnesMask(NumElts: EC);
138 VL = isa<FixedVectorType>(Val: VTy) ? Builder.CreateElementCount(Ty: XLenTy, EC)
139 : Constant::getAllOnesValue(Ty: XLenTy);
140 return true;
141 }
142 if (auto *SI = dyn_cast<StoreInst>(Val: I)) {
143 assert(SI->isSimple());
144 Ptr = SI->getPointerOperand();
145 Alignment = SI->getAlign();
146 assert(!Mask && "Unexpected mask on a store");
147 Mask = Builder.getAllOnesMask(NumElts: EC);
148 VL = isa<FixedVectorType>(Val: VTy) ? Builder.CreateElementCount(Ty: XLenTy, EC)
149 : Constant::getAllOnesValue(Ty: XLenTy);
150 return true;
151 }
152
153 auto *II = cast<IntrinsicInst>(Val: I);
154 switch (II->getIntrinsicID()) {
155 default:
156 llvm_unreachable("Unsupported intrinsic type");
157 case Intrinsic::vp_load:
158 case Intrinsic::vp_store: {
159 auto *VPLdSt = cast<VPIntrinsic>(Val: I);
160 Ptr = VPLdSt->getMemoryPointerParam();
161 Alignment = VPLdSt->getPointerAlignment().value_or(
162 u: DL.getABITypeAlign(Ty: VTy->getElementType()));
163
164 assert(Mask && "vp.load and vp.store needs a mask!");
165
166 Value *WideEVL = VPLdSt->getVectorLengthParam();
167 // Conservatively check if EVL is a multiple of factor, otherwise some
168 // (trailing) elements might be lost after the transformation.
169 if (!isMultipleOfN(V: WideEVL, DL: I->getDataLayout(), N: Factor))
170 return false;
171
172 auto *FactorC = ConstantInt::get(Ty: WideEVL->getType(), V: Factor);
173 VL = Builder.CreateZExt(V: Builder.CreateExactUDiv(LHS: WideEVL, RHS: FactorC), DestTy: XLenTy);
174 return true;
175 }
176 case Intrinsic::masked_load: {
177 Ptr = II->getOperand(i_nocapture: 0);
178 Alignment = II->getParamAlign(ArgNo: 0).valueOrOne();
179
180 if (!isa<UndefValue>(Val: II->getOperand(i_nocapture: 2)))
181 return false;
182
183 assert(Mask && "masked.load needs a mask!");
184
185 VL = isa<FixedVectorType>(Val: VTy)
186 ? Builder.CreateElementCount(Ty: XLenTy, EC: VTy->getElementCount())
187 : Constant::getAllOnesValue(Ty: XLenTy);
188 return true;
189 }
190 case Intrinsic::masked_store: {
191 Ptr = II->getOperand(i_nocapture: 1);
192 Alignment = II->getParamAlign(ArgNo: 1).valueOrOne();
193
194 assert(Mask && "masked.store needs a mask!");
195
196 VL = isa<FixedVectorType>(Val: VTy)
197 ? Builder.CreateElementCount(Ty: XLenTy, EC: VTy->getElementCount())
198 : Constant::getAllOnesValue(Ty: XLenTy);
199 return true;
200 }
201 }
202}
203
204/// Lower an interleaved load into a vlsegN intrinsic.
205///
206/// E.g. Lower an interleaved load (Factor = 2):
207/// %wide.vec = load <8 x i32>, <8 x i32>* %ptr
208/// %v0 = shuffle %wide.vec, undef, <0, 2, 4, 6> ; Extract even elements
209/// %v1 = shuffle %wide.vec, undef, <1, 3, 5, 7> ; Extract odd elements
210///
211/// Into:
212/// %ld2 = { <4 x i32>, <4 x i32> } call llvm.riscv.seg2.load.v4i32.p0.i64(
213/// %ptr, i64 4)
214/// %vec0 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 0
215/// %vec1 = extractelement { <4 x i32>, <4 x i32> } %ld2, i32 1
216bool RISCVTargetLowering::lowerInterleavedLoad(
217 Instruction *Load, Value *Mask, ArrayRef<ShuffleVectorInst *> Shuffles,
218 ArrayRef<unsigned> Indices, unsigned Factor, const APInt &GapMask) const {
219 assert(Indices.size() == Shuffles.size());
220 assert(GapMask.getBitWidth() == Factor);
221
222 // We only support cases where the skipped fields are the trailing ones.
223 if (!GapMask.isMask())
224 return false;
225 IRBuilder<> Builder(Load);
226
227 unsigned MaskFactor = GapMask.popcount();
228 const DataLayout &DL = Load->getDataLayout();
229 auto *VTy = cast<FixedVectorType>(Val: Shuffles[0]->getType());
230 auto *XLenTy = Builder.getIntNTy(N: Subtarget.getXLen());
231
232 Value *Ptr, *VL;
233 Align Alignment;
234 if (!getMemOperands(Factor: MaskFactor, VTy, XLenTy, I: Load, Ptr, Mask, VL, Alignment))
235 return false;
236
237 Type *PtrTy = Ptr->getType();
238 unsigned AS = PtrTy->getPointerAddressSpace();
239 if (!isLegalInterleavedAccessType(VTy, Factor: MaskFactor, Alignment, AddrSpace: AS, DL))
240 return false;
241
242 Value *SegLoad = nullptr;
243 if (MaskFactor < Factor && MaskFactor != 1) {
244 // Lower to strided segmented load.
245 unsigned ScalarSizeInBytes = DL.getTypeStoreSize(Ty: VTy->getElementType());
246 Value *Stride = ConstantInt::get(Ty: XLenTy, V: Factor * ScalarSizeInBytes);
247 SegLoad = Builder.CreateIntrinsic(ID: FixedVlssegIntrIds[MaskFactor - 2],
248 OverloadTypes: {VTy, PtrTy, XLenTy, XLenTy},
249 Args: {Ptr, Stride, Mask, VL});
250 } else {
251 // Lower to normal segmented load.
252 SegLoad = Builder.CreateIntrinsic(ID: FixedVlsegIntrIds[Factor - 2],
253 OverloadTypes: {VTy, PtrTy, XLenTy}, Args: {Ptr, Mask, VL});
254 }
255
256 for (unsigned i = 0; i < Shuffles.size(); i++) {
257 unsigned FactorIdx = Indices[i];
258 if (FactorIdx >= MaskFactor) {
259 // Replace masked-off factors (that are still extracted) with poison.
260 Shuffles[i]->replaceAllUsesWith(V: PoisonValue::get(T: VTy));
261 } else {
262 Value *SubVec = Builder.CreateExtractValue(Agg: SegLoad, Idxs: FactorIdx);
263 Shuffles[i]->replaceAllUsesWith(V: SubVec);
264 }
265 }
266
267 return true;
268}
269
270/// Lower an interleaved store into a vssegN intrinsic.
271///
272/// E.g. Lower an interleaved store (Factor = 3):
273/// %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
274/// <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
275/// store <12 x i32> %i.vec, <12 x i32>* %ptr
276///
277/// Into:
278/// %sub.v0 = shuffle <8 x i32> %v0, <8 x i32> v1, <0, 1, 2, 3>
279/// %sub.v1 = shuffle <8 x i32> %v0, <8 x i32> v1, <4, 5, 6, 7>
280/// %sub.v2 = shuffle <8 x i32> %v0, <8 x i32> v1, <8, 9, 10, 11>
281/// call void llvm.riscv.seg3.store.v4i32.p0.i64(%sub.v0, %sub.v1, %sub.v2,
282/// %ptr, i32 4)
283///
284/// Note that the new shufflevectors will be removed and we'll only generate one
285/// vsseg3 instruction in CodeGen.
286bool RISCVTargetLowering::lowerInterleavedStore(Instruction *Store,
287 Value *LaneMask,
288 ShuffleVectorInst *SVI,
289 unsigned Factor,
290 const APInt &GapMask) const {
291 assert(GapMask.getBitWidth() == Factor);
292
293 // We only support cases where the skipped fields are the trailing ones.
294 // TODO: Lower to strided store if there is only a single active field.
295 unsigned MaskFactor = GapMask.popcount();
296 if (MaskFactor < 2 || !GapMask.isMask())
297 return false;
298
299 IRBuilder<> Builder(Store);
300 const DataLayout &DL = Store->getDataLayout();
301 auto Mask = SVI->getShuffleMask();
302 auto *ShuffleVTy = cast<FixedVectorType>(Val: SVI->getType());
303 // Given SVI : <n*factor x ty>, then VTy : <n x ty>
304 auto *VTy = FixedVectorType::get(ElementType: ShuffleVTy->getElementType(),
305 NumElts: ShuffleVTy->getNumElements() / Factor);
306 auto *XLenTy = Builder.getIntNTy(N: Subtarget.getXLen());
307
308 Value *Ptr, *VL;
309 Align Alignment;
310 if (!getMemOperands(Factor: MaskFactor, VTy, XLenTy, I: Store, Ptr, Mask&: LaneMask, VL,
311 Alignment))
312 return false;
313
314 Type *PtrTy = Ptr->getType();
315 unsigned AS = PtrTy->getPointerAddressSpace();
316 if (!isLegalInterleavedAccessType(VTy, Factor: MaskFactor, Alignment, AddrSpace: AS, DL))
317 return false;
318
319 Function *SegStoreFunc;
320 if (MaskFactor < Factor)
321 // Strided segmented store.
322 SegStoreFunc = Intrinsic::getOrInsertDeclaration(
323 M: Store->getModule(), id: FixedVsssegIntrIds[MaskFactor - 2],
324 OverloadTys: {VTy, PtrTy, XLenTy, XLenTy});
325 else
326 // Normal segmented store.
327 SegStoreFunc = Intrinsic::getOrInsertDeclaration(
328 M: Store->getModule(), id: FixedVssegIntrIds[Factor - 2],
329 OverloadTys: {VTy, PtrTy, XLenTy});
330
331 SmallVector<Value *, 10> Ops;
332 SmallVector<int, 16> NewShuffleMask;
333
334 for (unsigned i = 0; i < MaskFactor; i++) {
335 // Collect shuffle mask for this lane.
336 for (unsigned j = 0; j < VTy->getNumElements(); j++)
337 NewShuffleMask.push_back(Elt: Mask[i + Factor * j]);
338
339 Value *Shuffle = Builder.CreateShuffleVector(
340 V1: SVI->getOperand(i_nocapture: 0), V2: SVI->getOperand(i_nocapture: 1), Mask: NewShuffleMask);
341 Ops.push_back(Elt: Shuffle);
342
343 NewShuffleMask.clear();
344 }
345 Ops.push_back(Elt: Ptr);
346 if (MaskFactor < Factor) {
347 // Insert the stride argument.
348 unsigned ScalarSizeInBytes = DL.getTypeStoreSize(Ty: VTy->getElementType());
349 Ops.push_back(Elt: ConstantInt::get(Ty: XLenTy, V: Factor * ScalarSizeInBytes));
350 }
351 Ops.append(IL: {LaneMask, VL});
352 Builder.CreateCall(Callee: SegStoreFunc, Args: Ops);
353
354 return true;
355}
356
357bool RISCVTargetLowering::lowerDeinterleaveIntrinsicToLoad(
358 Instruction *Load, Value *Mask, IntrinsicInst *DI,
359 const APInt &GapMask) const {
360 const unsigned Factor = getDeinterleaveIntrinsicFactor(ID: DI->getIntrinsicID());
361 assert(GapMask.getBitWidth() == Factor);
362 if (Factor > 8)
363 return false;
364
365 // We only support cases where the skipped fields are the trailing ones.
366 if (!GapMask.isMask())
367 return false;
368 IRBuilder<> Builder(Load);
369
370 VectorType *ResVTy = getDeinterleavedVectorType(DI);
371
372 unsigned MaskFactor = GapMask.getActiveBits();
373 // For MaskFactor of 1, we still want to lower it with segmented load
374 // (of the original Factor), because the sole field extraction will eventually
375 // turn it into a strided load.
376 bool UseStridedSeg = MaskFactor < Factor && MaskFactor > 1;
377 const DataLayout &DL = Load->getDataLayout();
378 auto *XLenTy = Builder.getIntNTy(N: Subtarget.getXLen());
379
380 Value *Ptr, *VL;
381 Align Alignment;
382 if (!getMemOperands(Factor, VTy: ResVTy, XLenTy, I: Load, Ptr, Mask, VL, Alignment))
383 return false;
384
385 Type *PtrTy = Ptr->getType();
386 unsigned AS = PtrTy->getPointerAddressSpace();
387 if (!isLegalInterleavedAccessType(VTy: ResVTy, Factor, Alignment, AddrSpace: AS, DL))
388 return false;
389
390 unsigned ElementSizeInBytes = DL.getTypeStoreSize(Ty: ResVTy->getElementType());
391 Value *Return;
392 if (isa<FixedVectorType>(Val: ResVTy)) {
393 Value *SegLoad;
394 if (UseStridedSeg) {
395 // Lower to strided segmented load.
396 Value *Stride = ConstantInt::get(Ty: XLenTy, V: Factor * ElementSizeInBytes);
397 SegLoad = Builder.CreateIntrinsic(ID: FixedVlssegIntrIds[MaskFactor - 2],
398 OverloadTypes: {ResVTy, PtrTy, XLenTy, XLenTy},
399 Args: {Ptr, Stride, Mask, VL});
400 } else {
401 SegLoad =
402 Builder.CreateIntrinsic(ID: FixedVlsegIntrIds[Factor - 2],
403 OverloadTypes: {ResVTy, PtrTy, XLenTy}, Args: {Ptr, Mask, VL});
404 }
405
406 if (MaskFactor != Factor) {
407 // Replace masked-off factors with poisons.
408 SmallVector<Type *, 8> AggrTypes{Factor, ResVTy};
409 Return = PoisonValue::get(T: StructType::get(Context&: Load->getContext(), Elements: AggrTypes));
410 for (unsigned I = 0; I < MaskFactor; ++I) {
411 Value *SubVec = Builder.CreateExtractValue(Agg: SegLoad, Idxs: I);
412 Return = Builder.CreateInsertValue(Agg: Return, Val: SubVec, Idxs: I);
413 }
414 } else {
415 Return = SegLoad;
416 }
417 } else {
418 unsigned SEW = DL.getTypeSizeInBits(Ty: ResVTy->getElementType());
419 unsigned NumElts = ResVTy->getElementCount().getKnownMinValue();
420 Type *VecTupTy = TargetExtType::get(
421 Context&: Load->getContext(), Name: "riscv.vector.tuple",
422 Types: ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: NumElts * SEW / 8),
423 Ints: UseStridedSeg ? MaskFactor : Factor);
424 Function *SegLoadFunc;
425 if (UseStridedSeg) {
426 // Lower to strided segmented load.
427 SegLoadFunc = Intrinsic::getOrInsertDeclaration(
428 M: Load->getModule(), id: ScalableVlssegIntrIds[MaskFactor - 2],
429 OverloadTys: {VecTupTy, PtrTy, XLenTy, Mask->getType()});
430 } else {
431 SegLoadFunc = Intrinsic::getOrInsertDeclaration(
432 M: Load->getModule(), id: ScalableVlsegIntrIds[Factor - 2],
433 OverloadTys: {VecTupTy, PtrTy, Mask->getType(), VL->getType()});
434 }
435
436 SmallVector<Value *, 8> Operands = {
437 PoisonValue::get(T: VecTupTy),
438 Ptr,
439 Mask,
440 VL,
441 ConstantInt::get(Ty: XLenTy,
442 V: RISCVVType::TAIL_AGNOSTIC | RISCVVType::MASK_AGNOSTIC),
443 ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))};
444 if (UseStridedSeg) {
445 Value *Stride = ConstantInt::get(Ty: XLenTy, V: Factor * ElementSizeInBytes);
446 Operands.insert(I: std::next(x: Operands.begin(), n: 2), Elt: Stride);
447 }
448
449 CallInst *Vlseg = Builder.CreateCall(Callee: SegLoadFunc, Args: Operands);
450
451 SmallVector<Type *, 8> AggrTypes{Factor, ResVTy};
452 Return = PoisonValue::get(T: StructType::get(Context&: Load->getContext(), Elements: AggrTypes));
453 for (unsigned i = 0; i < MaskFactor; ++i) {
454 Value *VecExtract = Builder.CreateIntrinsic(
455 ID: Intrinsic::riscv_tuple_extract, OverloadTypes: {ResVTy, VecTupTy},
456 Args: {Vlseg, Builder.getInt32(C: i)});
457 Return = Builder.CreateInsertValue(Agg: Return, Val: VecExtract, Idxs: i);
458 }
459 }
460
461 DI->replaceAllUsesWith(V: Return);
462 return true;
463}
464
465bool RISCVTargetLowering::lowerInterleaveIntrinsicToStore(
466 Instruction *Store, Value *Mask, ArrayRef<Value *> InterleaveValues) const {
467 unsigned Factor = InterleaveValues.size();
468 if (Factor > 8)
469 return false;
470
471 IRBuilder<> Builder(Store);
472
473 auto *InVTy = cast<VectorType>(Val: InterleaveValues[0]->getType());
474 const DataLayout &DL = Store->getDataLayout();
475 Type *XLenTy = Builder.getIntNTy(N: Subtarget.getXLen());
476
477 Value *Ptr, *VL;
478 Align Alignment;
479 if (!getMemOperands(Factor, VTy: InVTy, XLenTy, I: Store, Ptr, Mask, VL, Alignment))
480 return false;
481 Type *PtrTy = Ptr->getType();
482 unsigned AS = Ptr->getType()->getPointerAddressSpace();
483 if (!isLegalInterleavedAccessType(VTy: InVTy, Factor, Alignment, AddrSpace: AS, DL))
484 return false;
485
486 if (isa<FixedVectorType>(Val: InVTy)) {
487 Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
488 M: Store->getModule(), id: FixedVssegIntrIds[Factor - 2],
489 OverloadTys: {InVTy, PtrTy, XLenTy});
490 SmallVector<Value *, 10> Ops(InterleaveValues);
491 Ops.append(IL: {Ptr, Mask, VL});
492 Builder.CreateCall(Callee: VssegNFunc, Args: Ops);
493 return true;
494 }
495 unsigned SEW = DL.getTypeSizeInBits(Ty: InVTy->getElementType());
496 unsigned NumElts = InVTy->getElementCount().getKnownMinValue();
497 Type *VecTupTy = TargetExtType::get(
498 Context&: Store->getContext(), Name: "riscv.vector.tuple",
499 Types: ScalableVectorType::get(ElementType: Builder.getInt8Ty(), MinNumElts: NumElts * SEW / 8), Ints: Factor);
500
501 Value *StoredVal = PoisonValue::get(T: VecTupTy);
502 for (unsigned i = 0; i < Factor; ++i)
503 StoredVal = Builder.CreateIntrinsic(
504 ID: Intrinsic::riscv_tuple_insert, OverloadTypes: {VecTupTy, InVTy},
505 Args: {StoredVal, InterleaveValues[i], Builder.getInt32(C: i)});
506
507 Function *VssegNFunc = Intrinsic::getOrInsertDeclaration(
508 M: Store->getModule(), id: ScalableVssegIntrIds[Factor - 2],
509 OverloadTys: {VecTupTy, PtrTy, Mask->getType(), VL->getType()});
510
511 Value *Operands[] = {StoredVal, Ptr, Mask, VL,
512 ConstantInt::get(Ty: XLenTy, V: Log2_64(Value: SEW))};
513 Builder.CreateCall(Callee: VssegNFunc, Args: Operands);
514 return true;
515}
516