1//===- Scalarizer.cpp - Scalarize vector operations -----------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass converts vector operations into scalar operations (or, optionally,
10// operations on smaller vector widths), in order to expose optimization
11// opportunities on the individual scalar operations.
12// It is mainly intended for targets that do not have vector units, but it
13// may also be useful for revectorizing code to different vector widths.
14//
15//===----------------------------------------------------------------------===//
16
17#include "llvm/Transforms/Scalar/Scalarizer.h"
18#include "llvm/ADT/PostOrderIterator.h"
19#include "llvm/ADT/SmallVector.h"
20#include "llvm/ADT/Twine.h"
21#include "llvm/Analysis/TargetTransformInfo.h"
22#include "llvm/Analysis/VectorUtils.h"
23#include "llvm/IR/Argument.h"
24#include "llvm/IR/BasicBlock.h"
25#include "llvm/IR/Constants.h"
26#include "llvm/IR/DataLayout.h"
27#include "llvm/IR/DerivedTypes.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/InstVisitor.h"
32#include "llvm/IR/InstrTypes.h"
33#include "llvm/IR/Instruction.h"
34#include "llvm/IR/Instructions.h"
35#include "llvm/IR/Intrinsics.h"
36#include "llvm/IR/LLVMContext.h"
37#include "llvm/IR/Module.h"
38#include "llvm/IR/Type.h"
39#include "llvm/IR/Value.h"
40#include "llvm/InitializePasses.h"
41#include "llvm/Support/Casting.h"
42#include "llvm/Transforms/Utils/Local.h"
43#include <cassert>
44#include <cstdint>
45#include <iterator>
46#include <map>
47#include <utility>
48
49using namespace llvm;
50
51#define DEBUG_TYPE "scalarizer"
52
53static BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) {
54 BasicBlock *BB = Itr->getParent();
55 if (isa<PHINode>(Val: Itr))
56 Itr = BB->getFirstInsertionPt();
57 if (Itr != BB->end())
58 Itr = skipDebugIntrinsics(It: Itr);
59 return Itr;
60}
61
62// Used to store the scattered form of a vector.
63using ValueVector = SmallVector<Value *, 8>;
64
65// Used to map a vector Value and associated type to its scattered form.
66// The associated type is only non-null for pointer values that are "scattered"
67// when used as pointer operands to load or store.
68//
69// We use std::map because we want iterators to persist across insertion and
70// because the values are relatively large.
71using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>;
72
73// Lists Instructions that have been replaced with scalar implementations,
74// along with a pointer to their scattered forms.
75using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>;
76
77namespace {
78
79struct VectorSplit {
80 // The type of the vector.
81 FixedVectorType *VecTy = nullptr;
82
83 // The number of elements packed in a fragment (other than the remainder).
84 unsigned NumPacked = 0;
85
86 // The number of fragments (scalars or smaller vectors) into which the vector
87 // shall be split.
88 unsigned NumFragments = 0;
89
90 // The type of each complete fragment.
91 Type *SplitTy = nullptr;
92
93 // The type of the remainder (last) fragment; null if all fragments are
94 // complete.
95 Type *RemainderTy = nullptr;
96
97 Type *getFragmentType(unsigned I) const {
98 return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy;
99 }
100};
101
102// Provides a very limited vector-like interface for lazily accessing one
103// component of a scattered vector or vector pointer.
104class Scatterer {
105public:
106 Scatterer() = default;
107
108 // Scatter V into Size components. If new instructions are needed,
109 // insert them before BBI in BB. If Cache is nonnull, use it to cache
110 // the results.
111 Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
112 const VectorSplit &VS, ValueVector *cachePtr = nullptr);
113
114 // Return component I, creating a new Value for it if necessary.
115 Value *operator[](unsigned I);
116
117 // Return the number of components.
118 unsigned size() const { return VS.NumFragments; }
119
120private:
121 BasicBlock *BB;
122 BasicBlock::iterator BBI;
123 Value *V;
124 VectorSplit VS;
125 bool IsPointer;
126 ValueVector *CachePtr;
127 ValueVector Tmp;
128};
129
130// FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
131// called Name that compares X and Y in the same way as FCI.
132struct FCmpSplitter {
133 FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
134
135 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
136 const Twine &Name) const {
137 return Builder.CreateFCmp(P: FCI.getPredicate(), LHS: Op0, RHS: Op1, Name);
138 }
139
140 FCmpInst &FCI;
141};
142
143// ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
144// called Name that compares X and Y in the same way as ICI.
145struct ICmpSplitter {
146 ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
147
148 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
149 const Twine &Name) const {
150 return Builder.CreateICmp(P: ICI.getPredicate(), LHS: Op0, RHS: Op1, Name);
151 }
152
153 ICmpInst &ICI;
154};
155
156// UnarySplitter(UO)(Builder, X, Name) uses Builder to create
157// a unary operator like UO called Name with operand X.
158struct UnarySplitter {
159 UnarySplitter(UnaryOperator &uo) : UO(uo) {}
160
161 Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const {
162 return Builder.CreateUnOp(Opc: UO.getOpcode(), V: Op, Name);
163 }
164
165 UnaryOperator &UO;
166};
167
168// BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create
169// a binary operator like BO called Name with operands X and Y.
170struct BinarySplitter {
171 BinarySplitter(BinaryOperator &bo) : BO(bo) {}
172
173 Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
174 const Twine &Name) const {
175 return Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Op0, RHS: Op1, Name);
176 }
177
178 BinaryOperator &BO;
179};
180
181// Information about a load or store that we're scalarizing.
182struct VectorLayout {
183 VectorLayout() = default;
184
185 // Return the alignment of fragment Frag.
186 Align getFragmentAlign(unsigned Frag) {
187 return commonAlignment(A: VecAlign, Offset: Frag * SplitSize);
188 }
189
190 // The split of the underlying vector type.
191 VectorSplit VS;
192
193 // The alignment of the vector.
194 Align VecAlign;
195
196 // The size of each (non-remainder) fragment in bytes.
197 uint64_t SplitSize = 0;
198};
199} // namespace
200
201static bool isStructOfMatchingFixedVectors(Type *Ty) {
202 if (!isa<StructType>(Val: Ty))
203 return false;
204 unsigned StructSize = Ty->getNumContainedTypes();
205 if (StructSize < 1)
206 return false;
207 FixedVectorType *VecTy = dyn_cast<FixedVectorType>(Val: Ty->getContainedType(i: 0));
208 if (!VecTy)
209 return false;
210 unsigned VecSize = VecTy->getNumElements();
211 for (unsigned I = 1; I < StructSize; I++) {
212 VecTy = dyn_cast<FixedVectorType>(Val: Ty->getContainedType(i: I));
213 if (!VecTy || VecSize != VecTy->getNumElements())
214 return false;
215 }
216 return true;
217}
218
219/// Concatenate the given fragments to a single vector value of the type
220/// described in @p VS.
221static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments,
222 const VectorSplit &VS, Twine Name) {
223 unsigned NumElements = VS.VecTy->getNumElements();
224 SmallVector<int> ExtendMask;
225 SmallVector<int> InsertMask;
226
227 if (VS.NumPacked > 1) {
228 // Prepare the shufflevector masks once and re-use them for all
229 // fragments.
230 ExtendMask.resize(N: NumElements, NV: -1);
231 for (unsigned I = 0; I < VS.NumPacked; ++I)
232 ExtendMask[I] = I;
233
234 InsertMask.resize(N: NumElements);
235 for (unsigned I = 0; I < NumElements; ++I)
236 InsertMask[I] = I;
237 }
238
239 Value *Res = PoisonValue::get(T: VS.VecTy);
240 for (unsigned I = 0; I < VS.NumFragments; ++I) {
241 Value *Fragment = Fragments[I];
242
243 unsigned NumPacked = VS.NumPacked;
244 if (I == VS.NumFragments - 1 && VS.RemainderTy) {
245 if (auto *RemVecTy = dyn_cast<FixedVectorType>(Val: VS.RemainderTy))
246 NumPacked = RemVecTy->getNumElements();
247 else
248 NumPacked = 1;
249 }
250
251 if (NumPacked == 1) {
252 Res = Builder.CreateInsertElement(Vec: Res, NewElt: Fragment, Idx: I * VS.NumPacked,
253 Name: Name + ".upto" + Twine(I));
254 } else {
255 if (NumPacked < VS.NumPacked) {
256 // If last pack of remained bits not match current ExtendMask size.
257 ExtendMask.truncate(N: NumPacked);
258 ExtendMask.resize(N: NumElements, NV: -1);
259 }
260
261 Fragment = Builder.CreateShuffleVector(
262 V1: Fragment, V2: PoisonValue::get(T: Fragment->getType()), Mask: ExtendMask);
263 if (I == 0) {
264 Res = Fragment;
265 } else {
266 for (unsigned J = 0; J < NumPacked; ++J)
267 InsertMask[I * VS.NumPacked + J] = NumElements + J;
268 Res = Builder.CreateShuffleVector(V1: Res, V2: Fragment, Mask: InsertMask,
269 Name: Name + ".upto" + Twine(I));
270 for (unsigned J = 0; J < NumPacked; ++J)
271 InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J;
272 }
273 }
274 }
275
276 return Res;
277}
278
279namespace {
280class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> {
281public:
282 ScalarizerVisitor(DominatorTree *DT, const TargetTransformInfo *TTI,
283 ScalarizerPassOptions Options)
284 : DT(DT), TTI(TTI),
285 ScalarizeVariableInsertExtract(Options.ScalarizeVariableInsertExtract),
286 ScalarizeLoadStore(Options.ScalarizeLoadStore),
287 ScalarizeMinBits(Options.ScalarizeMinBits) {}
288
289 bool visit(Function &F);
290
291 // InstVisitor methods. They return true if the instruction was scalarized,
292 // false if nothing changed.
293 bool visitInstruction(Instruction &I) { return false; }
294 bool visitSelectInst(SelectInst &SI);
295 bool visitICmpInst(ICmpInst &ICI);
296 bool visitFCmpInst(FCmpInst &FCI);
297 bool visitUnaryOperator(UnaryOperator &UO);
298 bool visitBinaryOperator(BinaryOperator &BO);
299 bool visitGetElementPtrInst(GetElementPtrInst &GEPI);
300 bool visitCastInst(CastInst &CI);
301 bool visitBitCastInst(BitCastInst &BCI);
302 bool visitInsertElementInst(InsertElementInst &IEI);
303 bool visitExtractElementInst(ExtractElementInst &EEI);
304 bool visitExtractValueInst(ExtractValueInst &EVI);
305 bool visitShuffleVectorInst(ShuffleVectorInst &SVI);
306 bool visitPHINode(PHINode &PHI);
307 bool visitLoadInst(LoadInst &LI);
308 bool visitStoreInst(StoreInst &SI);
309 bool visitCallInst(CallInst &ICI);
310 bool visitFreezeInst(FreezeInst &FI);
311
312private:
313 Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS);
314 void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS);
315 void replaceUses(Instruction *Op, Value *CV);
316 bool canTransferMetadata(unsigned Kind);
317 void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV);
318 std::optional<VectorSplit> getVectorSplit(Type *Ty);
319 std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment,
320 const DataLayout &DL);
321 bool finish();
322
323 template<typename T> bool splitUnary(Instruction &, const T &);
324 template<typename T> bool splitBinary(Instruction &, const T &);
325
326 bool splitCall(CallInst &CI);
327
328 ScatterMap Scattered;
329 GatherList Gathered;
330 bool Scalarized;
331
332 SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs;
333
334 DominatorTree *DT;
335 const TargetTransformInfo *TTI;
336
337 const bool ScalarizeVariableInsertExtract;
338 const bool ScalarizeLoadStore;
339 const unsigned ScalarizeMinBits;
340};
341
342class ScalarizerLegacyPass : public FunctionPass {
343public:
344 static char ID;
345 ScalarizerPassOptions Options;
346 ScalarizerLegacyPass() : FunctionPass(ID), Options() {}
347 ScalarizerLegacyPass(const ScalarizerPassOptions &Options);
348 bool runOnFunction(Function &F) override;
349 void getAnalysisUsage(AnalysisUsage &AU) const override;
350};
351
352} // end anonymous namespace
353
354ScalarizerLegacyPass::ScalarizerLegacyPass(const ScalarizerPassOptions &Options)
355 : FunctionPass(ID), Options(Options) {}
356
357void ScalarizerLegacyPass::getAnalysisUsage(AnalysisUsage &AU) const {
358 AU.addRequired<DominatorTreeWrapperPass>();
359 AU.addRequired<TargetTransformInfoWrapperPass>();
360 AU.addPreserved<DominatorTreeWrapperPass>();
361}
362
363char ScalarizerLegacyPass::ID = 0;
364INITIALIZE_PASS_BEGIN(ScalarizerLegacyPass, "scalarizer",
365 "Scalarize vector operations", false, false)
366INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
367INITIALIZE_PASS_END(ScalarizerLegacyPass, "scalarizer",
368 "Scalarize vector operations", false, false)
369
370Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
371 const VectorSplit &VS, ValueVector *cachePtr)
372 : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) {
373 IsPointer = V->getType()->isPointerTy();
374 if (!CachePtr) {
375 Tmp.resize(N: VS.NumFragments, NV: nullptr);
376 } else {
377 assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() ||
378 IsPointer) &&
379 "Inconsistent vector sizes");
380 if (VS.NumFragments > CachePtr->size())
381 CachePtr->resize(N: VS.NumFragments, NV: nullptr);
382 }
383}
384
385// Return fragment Frag, creating a new Value for it if necessary.
386Value *Scatterer::operator[](unsigned Frag) {
387 ValueVector &CV = CachePtr ? *CachePtr : Tmp;
388 // Try to reuse a previous value.
389 if (CV[Frag])
390 return CV[Frag];
391 IRBuilder<> Builder(BB, BBI);
392 if (IsPointer) {
393 if (Frag == 0)
394 CV[Frag] = V;
395 else
396 CV[Frag] = Builder.CreateConstGEP1_32(Ty: VS.SplitTy, Ptr: V, Idx0: Frag,
397 Name: V->getName() + ".i" + Twine(Frag));
398 return CV[Frag];
399 }
400
401 Type *FragmentTy = VS.getFragmentType(I: Frag);
402
403 if (auto *VecTy = dyn_cast<FixedVectorType>(Val: FragmentTy)) {
404 SmallVector<int> Mask;
405 for (unsigned J = 0; J < VecTy->getNumElements(); ++J)
406 Mask.push_back(Elt: Frag * VS.NumPacked + J);
407 CV[Frag] =
408 Builder.CreateShuffleVector(V1: V, V2: PoisonValue::get(T: V->getType()), Mask,
409 Name: V->getName() + ".i" + Twine(Frag));
410 } else {
411 // Search through a chain of InsertElementInsts looking for element Frag.
412 // Record other elements in the cache. The new V is still suitable
413 // for all uncached indices.
414 while (true) {
415 InsertElementInst *Insert = dyn_cast<InsertElementInst>(Val: V);
416 if (!Insert)
417 break;
418 ConstantInt *Idx = dyn_cast<ConstantInt>(Val: Insert->getOperand(i_nocapture: 2));
419 if (!Idx)
420 break;
421 unsigned J = Idx->getZExtValue();
422 V = Insert->getOperand(i_nocapture: 0);
423 if (Frag * VS.NumPacked == J) {
424 CV[Frag] = Insert->getOperand(i_nocapture: 1);
425 return CV[Frag];
426 }
427
428 if (VS.NumPacked == 1 && !CV[J]) {
429 // Only cache the first entry we find for each index we're not actively
430 // searching for. This prevents us from going too far up the chain and
431 // caching incorrect entries.
432 CV[J] = Insert->getOperand(i_nocapture: 1);
433 }
434 }
435 CV[Frag] = Builder.CreateExtractElement(Vec: V, Idx: Frag * VS.NumPacked,
436 Name: V->getName() + ".i" + Twine(Frag));
437 }
438
439 return CV[Frag];
440}
441
442bool ScalarizerLegacyPass::runOnFunction(Function &F) {
443 if (skipFunction(F))
444 return false;
445
446 DominatorTree *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
447 const TargetTransformInfo *TTI =
448 &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
449 ScalarizerVisitor Impl(DT, TTI, Options);
450 return Impl.visit(F);
451}
452
453FunctionPass *llvm::createScalarizerPass(const ScalarizerPassOptions &Options) {
454 return new ScalarizerLegacyPass(Options);
455}
456
457bool ScalarizerVisitor::visit(Function &F) {
458 assert(Gathered.empty() && Scattered.empty());
459
460 Scalarized = false;
461
462 // To ensure we replace gathered components correctly we need to do an ordered
463 // traversal of the basic blocks in the function.
464 ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock());
465 for (BasicBlock *BB : RPOT) {
466 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) {
467 Instruction *I = &*II;
468 bool Done = InstVisitor::visit(I);
469 ++II;
470 if (Done && I->getType()->isVoidTy()) {
471 I->eraseFromParent();
472 Scalarized = true;
473 }
474 }
475 }
476 return finish();
477}
478
479// Return a scattered form of V that can be accessed by Point. V must be a
480// vector or a pointer to a vector.
481Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V,
482 const VectorSplit &VS) {
483 if (Argument *VArg = dyn_cast<Argument>(Val: V)) {
484 // Put the scattered form of arguments in the entry block,
485 // so that it can be used everywhere.
486 Function *F = VArg->getParent();
487 BasicBlock *BB = &F->getEntryBlock();
488 return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]);
489 }
490 if (Instruction *VOp = dyn_cast<Instruction>(Val: V)) {
491 // When scalarizing PHI nodes we might try to examine/rewrite InsertElement
492 // nodes in predecessors. If those predecessors are unreachable from entry,
493 // then the IR in those blocks could have unexpected properties resulting in
494 // infinite loops in Scatterer::operator[]. By simply treating values
495 // originating from instructions in unreachable blocks as undef we do not
496 // need to analyse them further.
497 if (!DT->isReachableFromEntry(A: VOp->getParent()))
498 return Scatterer(Point->getParent(), Point->getIterator(),
499 PoisonValue::get(T: V->getType()), VS);
500 // Put the scattered form of an instruction directly after the
501 // instruction, skipping over PHI nodes and debug intrinsics.
502 BasicBlock *BB = VOp->getParent();
503 return Scatterer(
504 BB, skipPastPhiNodesAndDbg(Itr: std::next(x: BasicBlock::iterator(VOp))), V, VS,
505 &Scattered[{V, VS.SplitTy}]);
506 }
507 // In the fallback case, just put the scattered before Point and
508 // keep the result local to Point.
509 return Scatterer(Point->getParent(), Point->getIterator(), V, VS);
510}
511
512// Replace Op with the gathered form of the components in CV. Defer the
513// deletion of Op and creation of the gathered form to the end of the pass,
514// so that we can avoid creating the gathered form if all uses of Op are
515// replaced with uses of CV.
516void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV,
517 const VectorSplit &VS) {
518 transferMetadataAndIRFlags(Op, CV);
519
520 // If we already have a scattered form of Op (created from ExtractElements
521 // of Op itself), replace them with the new form.
522 ValueVector &SV = Scattered[{Op, VS.SplitTy}];
523 if (!SV.empty()) {
524 for (unsigned I = 0, E = SV.size(); I != E; ++I) {
525 Value *V = SV[I];
526 if (V == nullptr || SV[I] == CV[I])
527 continue;
528
529 Instruction *Old = cast<Instruction>(Val: V);
530 if (isa<Instruction>(Val: CV[I]))
531 CV[I]->takeName(V: Old);
532 Old->replaceAllUsesWith(V: CV[I]);
533 PotentiallyDeadInstrs.emplace_back(Args&: Old);
534 }
535 }
536 SV = CV;
537 Gathered.push_back(Elt: GatherList::value_type(Op, &SV));
538}
539
540// Replace Op with CV and collect Op has a potentially dead instruction.
541void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) {
542 if (CV != Op) {
543 Op->replaceAllUsesWith(V: CV);
544 PotentiallyDeadInstrs.emplace_back(Args&: Op);
545 Scalarized = true;
546 }
547}
548
549// Return true if it is safe to transfer the given metadata tag from
550// vector to scalar instructions.
551bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) {
552 return (Tag == LLVMContext::MD_tbaa
553 || Tag == LLVMContext::MD_fpmath
554 || Tag == LLVMContext::MD_tbaa_struct
555 || Tag == LLVMContext::MD_invariant_load
556 || Tag == LLVMContext::MD_alias_scope
557 || Tag == LLVMContext::MD_noalias
558 || Tag == LLVMContext::MD_mem_parallel_loop_access
559 || Tag == LLVMContext::MD_access_group);
560}
561
562// Transfer metadata from Op to the instructions in CV if it is known
563// to be safe to do so.
564void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op,
565 const ValueVector &CV) {
566 SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
567 Op->getAllMetadataOtherThanDebugLoc(MDs);
568 for (Value *V : CV) {
569 if (Instruction *New = dyn_cast<Instruction>(Val: V)) {
570 for (const auto &MD : MDs)
571 if (canTransferMetadata(Tag: MD.first))
572 New->setMetadata(KindID: MD.first, Node: MD.second);
573 New->copyIRFlags(V: Op);
574 if (Op->getDebugLoc() && !New->getDebugLoc())
575 New->setDebugLoc(Op->getDebugLoc());
576 }
577 }
578}
579
580// Determine how Ty is split, if at all.
581std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) {
582 VectorSplit Split;
583 Split.VecTy = dyn_cast<FixedVectorType>(Val: Ty);
584 if (!Split.VecTy)
585 return {};
586
587 unsigned NumElems = Split.VecTy->getNumElements();
588 Type *ElemTy = Split.VecTy->getElementType();
589
590 if (NumElems == 1 || ElemTy->isPointerTy() ||
591 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) {
592 Split.NumPacked = 1;
593 Split.NumFragments = NumElems;
594 Split.SplitTy = ElemTy;
595 } else {
596 Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits();
597 if (Split.NumPacked >= NumElems)
598 return {};
599
600 Split.NumFragments = divideCeil(Numerator: NumElems, Denominator: Split.NumPacked);
601 Split.SplitTy = FixedVectorType::get(ElementType: ElemTy, NumElts: Split.NumPacked);
602
603 unsigned RemainderElems = NumElems % Split.NumPacked;
604 if (RemainderElems > 1)
605 Split.RemainderTy = FixedVectorType::get(ElementType: ElemTy, NumElts: RemainderElems);
606 else if (RemainderElems == 1)
607 Split.RemainderTy = ElemTy;
608 }
609
610 return Split;
611}
612
613// Try to fill in Layout from Ty, returning true on success. Alignment is
614// the alignment of the vector, or std::nullopt if the ABI default should be
615// used.
616std::optional<VectorLayout>
617ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment,
618 const DataLayout &DL) {
619 std::optional<VectorSplit> VS = getVectorSplit(Ty);
620 if (!VS)
621 return {};
622
623 VectorLayout Layout;
624 Layout.VS = *VS;
625 // Check that we're dealing with full-byte fragments.
626 if (!DL.typeSizeEqualsStoreSize(Ty: VS->SplitTy) ||
627 (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(Ty: VS->RemainderTy)))
628 return {};
629 Layout.VecAlign = Alignment;
630 Layout.SplitSize = DL.getTypeStoreSize(Ty: VS->SplitTy);
631 return Layout;
632}
633
634// Scalarize one-operand instruction I, using Split(Builder, X, Name)
635// to create an instruction like I with operand X and name Name.
636template<typename Splitter>
637bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) {
638 std::optional<VectorSplit> VS = getVectorSplit(Ty: I.getType());
639 if (!VS)
640 return false;
641
642 std::optional<VectorSplit> OpVS;
643 if (I.getOperand(i: 0)->getType() == I.getType()) {
644 OpVS = VS;
645 } else {
646 OpVS = getVectorSplit(Ty: I.getOperand(i: 0)->getType());
647 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
648 return false;
649 }
650
651 IRBuilder<> Builder(&I);
652 Scatterer Op = scatter(Point: &I, V: I.getOperand(i: 0), VS: *OpVS);
653 assert(Op.size() == VS->NumFragments && "Mismatched unary operation");
654 ValueVector Res;
655 Res.resize(N: VS->NumFragments);
656 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag)
657 Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag));
658 gather(Op: &I, CV: Res, VS: *VS);
659 return true;
660}
661
662// Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
663// to create an instruction like I with operands X and Y and name Name.
664template<typename Splitter>
665bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) {
666 std::optional<VectorSplit> VS = getVectorSplit(Ty: I.getType());
667 if (!VS)
668 return false;
669
670 std::optional<VectorSplit> OpVS;
671 if (I.getOperand(i: 0)->getType() == I.getType()) {
672 OpVS = VS;
673 } else {
674 OpVS = getVectorSplit(Ty: I.getOperand(i: 0)->getType());
675 if (!OpVS || VS->NumPacked != OpVS->NumPacked)
676 return false;
677 }
678
679 IRBuilder<> Builder(&I);
680 Scatterer VOp0 = scatter(Point: &I, V: I.getOperand(i: 0), VS: *OpVS);
681 Scatterer VOp1 = scatter(Point: &I, V: I.getOperand(i: 1), VS: *OpVS);
682 assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation");
683 assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation");
684 ValueVector Res;
685 Res.resize(N: VS->NumFragments);
686 for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) {
687 Value *Op0 = VOp0[Frag];
688 Value *Op1 = VOp1[Frag];
689 Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag));
690 }
691 gather(Op: &I, CV: Res, VS: *VS);
692 return true;
693}
694
695/// If a call to a vector typed intrinsic function, split into a scalar call per
696/// element if possible for the intrinsic.
697bool ScalarizerVisitor::splitCall(CallInst &CI) {
698 Type *CallType = CI.getType();
699 bool AreAllVectorsOfMatchingSize = isStructOfMatchingFixedVectors(Ty: CallType);
700 std::optional<VectorSplit> VS;
701 if (AreAllVectorsOfMatchingSize)
702 VS = getVectorSplit(Ty: CallType->getContainedType(i: 0));
703 else
704 VS = getVectorSplit(Ty: CallType);
705 if (!VS)
706 return false;
707
708 Function *F = CI.getCalledFunction();
709 if (!F)
710 return false;
711
712 Intrinsic::ID ID = F->getIntrinsicID();
713
714 if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID, TTI))
715 return false;
716
717 // unsigned NumElems = VT->getNumElements();
718 unsigned NumArgs = CI.arg_size();
719
720 ValueVector ScalarOperands(NumArgs);
721 SmallVector<Scatterer, 8> Scattered(NumArgs);
722 SmallVector<int> OverloadIdx(NumArgs, -1);
723
724 SmallVector<llvm::Type *, 3> Tys;
725 // Add return type if intrinsic is overloaded on it.
726 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: -1, TTI))
727 Tys.push_back(Elt: VS->SplitTy);
728
729 if (AreAllVectorsOfMatchingSize) {
730 for (unsigned I = 1; I < CallType->getNumContainedTypes(); I++) {
731 std::optional<VectorSplit> CurrVS =
732 getVectorSplit(Ty: cast<FixedVectorType>(Val: CallType->getContainedType(i: I)));
733 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
734 // VectorSplit is not returned and we will bailout of handling this call.
735 // The secondary bailout case is if NumPacked does not match. This can
736 // happen if ScalarizeMinBits is not set to the default. This means with
737 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
738 // the struct elements have the same bitness.
739 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
740 return false;
741 if (isVectorIntrinsicWithStructReturnOverloadAtField(ID, RetIdx: I, TTI))
742 Tys.push_back(Elt: CurrVS->SplitTy);
743 }
744 }
745 // Assumes that any vector type has the same number of elements as the return
746 // vector type, which is true for all current intrinsics.
747 for (unsigned I = 0; I != NumArgs; ++I) {
748 Value *OpI = CI.getOperand(i_nocapture: I);
749 if ([[maybe_unused]] auto *OpVecTy =
750 dyn_cast<FixedVectorType>(Val: OpI->getType())) {
751 assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements());
752 std::optional<VectorSplit> OpVS = getVectorSplit(Ty: OpI->getType());
753 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
754 // The natural split of the operand doesn't match the result. This could
755 // happen if the vector elements are different and the ScalarizeMinBits
756 // option is used.
757 //
758 // We could in principle handle this case as well, at the cost of
759 // complicating the scattering machinery to support multiple scattering
760 // granularities for a single value.
761 return false;
762 }
763
764 Scattered[I] = scatter(Point: &CI, V: OpI, VS: *OpVS);
765 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: I, TTI)) {
766 OverloadIdx[I] = Tys.size();
767 Tys.push_back(Elt: OpVS->SplitTy);
768 }
769 } else {
770 ScalarOperands[I] = OpI;
771 if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: I, TTI))
772 Tys.push_back(Elt: OpI->getType());
773 }
774 }
775
776 ValueVector Res(VS->NumFragments);
777 ValueVector ScalarCallOps(NumArgs);
778
779 Function *NewIntrin =
780 Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
781 IRBuilder<> Builder(&CI);
782
783 // Perform actual scalarization, taking care to preserve any scalar operands.
784 for (unsigned I = 0; I < VS->NumFragments; ++I) {
785 bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy;
786 ScalarCallOps.clear();
787
788 if (IsRemainder)
789 Tys[0] = VS->RemainderTy;
790
791 for (unsigned J = 0; J != NumArgs; ++J) {
792 if (isVectorIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx: J, TTI)) {
793 ScalarCallOps.push_back(Elt: ScalarOperands[J]);
794 } else {
795 ScalarCallOps.push_back(Elt: Scattered[J][I]);
796 if (IsRemainder && OverloadIdx[J] >= 0)
797 Tys[OverloadIdx[J]] = Scattered[J][I]->getType();
798 }
799 }
800
801 if (IsRemainder)
802 NewIntrin = Intrinsic::getOrInsertDeclaration(M: F->getParent(), id: ID, Tys);
803
804 Res[I] = Builder.CreateCall(Callee: NewIntrin, Args: ScalarCallOps,
805 Name: CI.getName() + ".i" + Twine(I));
806 }
807
808 gather(Op: &CI, CV: Res, VS: *VS);
809 return true;
810}
811
812bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) {
813 std::optional<VectorSplit> VS = getVectorSplit(Ty: SI.getType());
814 if (!VS)
815 return false;
816
817 std::optional<VectorSplit> CondVS;
818 if (isa<FixedVectorType>(Val: SI.getCondition()->getType())) {
819 CondVS = getVectorSplit(Ty: SI.getCondition()->getType());
820 if (!CondVS || CondVS->NumPacked != VS->NumPacked) {
821 // This happens when ScalarizeMinBits is used.
822 return false;
823 }
824 }
825
826 IRBuilder<> Builder(&SI);
827 Scatterer VOp1 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 1), VS: *VS);
828 Scatterer VOp2 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 2), VS: *VS);
829 assert(VOp1.size() == VS->NumFragments && "Mismatched select");
830 assert(VOp2.size() == VS->NumFragments && "Mismatched select");
831 ValueVector Res;
832 Res.resize(N: VS->NumFragments);
833
834 if (CondVS) {
835 Scatterer VOp0 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 0), VS: *CondVS);
836 assert(VOp0.size() == CondVS->NumFragments && "Mismatched select");
837 for (unsigned I = 0; I < VS->NumFragments; ++I) {
838 Value *Op0 = VOp0[I];
839 Value *Op1 = VOp1[I];
840 Value *Op2 = VOp2[I];
841 Res[I] = Builder.CreateSelect(C: Op0, True: Op1, False: Op2,
842 Name: SI.getName() + ".i" + Twine(I));
843 }
844 } else {
845 Value *Op0 = SI.getOperand(i_nocapture: 0);
846 for (unsigned I = 0; I < VS->NumFragments; ++I) {
847 Value *Op1 = VOp1[I];
848 Value *Op2 = VOp2[I];
849 Res[I] = Builder.CreateSelect(C: Op0, True: Op1, False: Op2,
850 Name: SI.getName() + ".i" + Twine(I));
851 }
852 }
853 gather(Op: &SI, CV: Res, VS: *VS);
854 return true;
855}
856
857bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) {
858 return splitBinary(I&: ICI, Split: ICmpSplitter(ICI));
859}
860
861bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) {
862 return splitBinary(I&: FCI, Split: FCmpSplitter(FCI));
863}
864
865bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) {
866 return splitUnary(I&: UO, Split: UnarySplitter(UO));
867}
868
869bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) {
870 return splitBinary(I&: BO, Split: BinarySplitter(BO));
871}
872
873bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
874 std::optional<VectorSplit> VS = getVectorSplit(Ty: GEPI.getType());
875 if (!VS)
876 return false;
877
878 IRBuilder<> Builder(&GEPI);
879 unsigned NumIndices = GEPI.getNumIndices();
880
881 // The base pointer and indices might be scalar even if it's a vector GEP.
882 SmallVector<Value *, 8> ScalarOps{1 + NumIndices};
883 SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices};
884
885 for (unsigned I = 0; I < 1 + NumIndices; ++I) {
886 if (auto *VecTy =
887 dyn_cast<FixedVectorType>(Val: GEPI.getOperand(i_nocapture: I)->getType())) {
888 std::optional<VectorSplit> OpVS = getVectorSplit(Ty: VecTy);
889 if (!OpVS || OpVS->NumPacked != VS->NumPacked) {
890 // This can happen when ScalarizeMinBits is used.
891 return false;
892 }
893 ScatterOps[I] = scatter(Point: &GEPI, V: GEPI.getOperand(i_nocapture: I), VS: *OpVS);
894 } else {
895 ScalarOps[I] = GEPI.getOperand(i_nocapture: I);
896 }
897 }
898
899 ValueVector Res;
900 Res.resize(N: VS->NumFragments);
901 for (unsigned I = 0; I < VS->NumFragments; ++I) {
902 SmallVector<Value *, 8> SplitOps;
903 SplitOps.resize(N: 1 + NumIndices);
904 for (unsigned J = 0; J < 1 + NumIndices; ++J) {
905 if (ScalarOps[J])
906 SplitOps[J] = ScalarOps[J];
907 else
908 SplitOps[J] = ScatterOps[J][I];
909 }
910 Res[I] = Builder.CreateGEP(Ty: GEPI.getSourceElementType(), Ptr: SplitOps[0],
911 IdxList: ArrayRef(SplitOps).drop_front(),
912 Name: GEPI.getName() + ".i" + Twine(I));
913 if (GEPI.isInBounds())
914 if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Val: Res[I]))
915 NewGEPI->setIsInBounds();
916 }
917 gather(Op: &GEPI, CV: Res, VS: *VS);
918 return true;
919}
920
921bool ScalarizerVisitor::visitCastInst(CastInst &CI) {
922 std::optional<VectorSplit> DestVS = getVectorSplit(Ty: CI.getDestTy());
923 if (!DestVS)
924 return false;
925
926 std::optional<VectorSplit> SrcVS = getVectorSplit(Ty: CI.getSrcTy());
927 if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked)
928 return false;
929
930 IRBuilder<> Builder(&CI);
931 Scatterer Op0 = scatter(Point: &CI, V: CI.getOperand(i_nocapture: 0), VS: *SrcVS);
932 assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast");
933 ValueVector Res;
934 Res.resize(N: DestVS->NumFragments);
935 for (unsigned I = 0; I < DestVS->NumFragments; ++I)
936 Res[I] =
937 Builder.CreateCast(Op: CI.getOpcode(), V: Op0[I], DestTy: DestVS->getFragmentType(I),
938 Name: CI.getName() + ".i" + Twine(I));
939 gather(Op: &CI, CV: Res, VS: *DestVS);
940 return true;
941}
942
943bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) {
944 std::optional<VectorSplit> DstVS = getVectorSplit(Ty: BCI.getDestTy());
945 std::optional<VectorSplit> SrcVS = getVectorSplit(Ty: BCI.getSrcTy());
946 if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy)
947 return false;
948
949 const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy();
950
951 // Vectors of pointers are always fully scalarized.
952 assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1));
953
954 IRBuilder<> Builder(&BCI);
955 Scatterer Op0 = scatter(Point: &BCI, V: BCI.getOperand(i_nocapture: 0), VS: *SrcVS);
956 ValueVector Res;
957 Res.resize(N: DstVS->NumFragments);
958
959 unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits();
960 unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits();
961
962 if (isPointerTy || DstSplitBits == SrcSplitBits) {
963 assert(DstVS->NumFragments == SrcVS->NumFragments);
964 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
965 Res[I] = Builder.CreateBitCast(V: Op0[I], DestTy: DstVS->getFragmentType(I),
966 Name: BCI.getName() + ".i" + Twine(I));
967 }
968 } else if (SrcSplitBits % DstSplitBits == 0) {
969 // Convert each source fragment to the same-sized destination vector and
970 // then scatter the result to the destination.
971 VectorSplit MidVS;
972 MidVS.NumPacked = DstVS->NumPacked;
973 MidVS.NumFragments = SrcSplitBits / DstSplitBits;
974 MidVS.VecTy = FixedVectorType::get(ElementType: DstVS->VecTy->getElementType(),
975 NumElts: MidVS.NumPacked * MidVS.NumFragments);
976 MidVS.SplitTy = DstVS->SplitTy;
977
978 unsigned ResI = 0;
979 for (unsigned I = 0; I < SrcVS->NumFragments; ++I) {
980 Value *V = Op0[I];
981
982 // Look through any existing bitcasts before converting to <N x t2>.
983 // In the best case, the resulting conversion might be a no-op.
984 Instruction *VI;
985 while ((VI = dyn_cast<Instruction>(Val: V)) &&
986 VI->getOpcode() == Instruction::BitCast)
987 V = VI->getOperand(i: 0);
988
989 V = Builder.CreateBitCast(V, DestTy: MidVS.VecTy, Name: V->getName() + ".cast");
990
991 Scatterer Mid = scatter(Point: &BCI, V, VS: MidVS);
992 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
993 Res[ResI++] = Mid[J];
994 }
995 } else if (DstSplitBits % SrcSplitBits == 0) {
996 // Gather enough source fragments to make up a destination fragment and
997 // then convert to the destination type.
998 VectorSplit MidVS;
999 MidVS.NumFragments = DstSplitBits / SrcSplitBits;
1000 MidVS.NumPacked = SrcVS->NumPacked;
1001 MidVS.VecTy = FixedVectorType::get(ElementType: SrcVS->VecTy->getElementType(),
1002 NumElts: MidVS.NumPacked * MidVS.NumFragments);
1003 MidVS.SplitTy = SrcVS->SplitTy;
1004
1005 unsigned SrcI = 0;
1006 SmallVector<Value *, 8> ConcatOps;
1007 ConcatOps.resize(N: MidVS.NumFragments);
1008 for (unsigned I = 0; I < DstVS->NumFragments; ++I) {
1009 for (unsigned J = 0; J < MidVS.NumFragments; ++J)
1010 ConcatOps[J] = Op0[SrcI++];
1011 Value *V = concatenate(Builder, Fragments: ConcatOps, VS: MidVS,
1012 Name: BCI.getName() + ".i" + Twine(I));
1013 Res[I] = Builder.CreateBitCast(V, DestTy: DstVS->getFragmentType(I),
1014 Name: BCI.getName() + ".i" + Twine(I));
1015 }
1016 } else {
1017 return false;
1018 }
1019
1020 gather(Op: &BCI, CV: Res, VS: *DstVS);
1021 return true;
1022}
1023
1024bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) {
1025 std::optional<VectorSplit> VS = getVectorSplit(Ty: IEI.getType());
1026 if (!VS)
1027 return false;
1028
1029 IRBuilder<> Builder(&IEI);
1030 Scatterer Op0 = scatter(Point: &IEI, V: IEI.getOperand(i_nocapture: 0), VS: *VS);
1031 Value *NewElt = IEI.getOperand(i_nocapture: 1);
1032 Value *InsIdx = IEI.getOperand(i_nocapture: 2);
1033
1034 ValueVector Res;
1035 Res.resize(N: VS->NumFragments);
1036
1037 if (auto *CI = dyn_cast<ConstantInt>(Val: InsIdx)) {
1038 unsigned Idx = CI->getZExtValue();
1039 unsigned Fragment = Idx / VS->NumPacked;
1040 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1041 if (I == Fragment) {
1042 bool IsPacked = VS->NumPacked > 1;
1043 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1044 !VS->RemainderTy->isVectorTy())
1045 IsPacked = false;
1046 if (IsPacked) {
1047 Res[I] =
1048 Builder.CreateInsertElement(Vec: Op0[I], NewElt, Idx: Idx % VS->NumPacked);
1049 } else {
1050 Res[I] = NewElt;
1051 }
1052 } else {
1053 Res[I] = Op0[I];
1054 }
1055 }
1056 } else {
1057 // Never split a variable insertelement that isn't fully scalarized.
1058 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1059 return false;
1060
1061 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1062 Value *ShouldReplace =
1063 Builder.CreateICmpEQ(LHS: InsIdx, RHS: ConstantInt::get(Ty: InsIdx->getType(), V: I),
1064 Name: InsIdx->getName() + ".is." + Twine(I));
1065 Value *OldElt = Op0[I];
1066 Res[I] = Builder.CreateSelect(C: ShouldReplace, True: NewElt, False: OldElt,
1067 Name: IEI.getName() + ".i" + Twine(I));
1068 }
1069 }
1070
1071 gather(Op: &IEI, CV: Res, VS: *VS);
1072 return true;
1073}
1074
1075bool ScalarizerVisitor::visitExtractValueInst(ExtractValueInst &EVI) {
1076 Value *Op = EVI.getOperand(i_nocapture: 0);
1077 Type *OpTy = Op->getType();
1078 ValueVector Res;
1079 if (!isStructOfMatchingFixedVectors(Ty: OpTy))
1080 return false;
1081 if (CallInst *CI = dyn_cast<CallInst>(Val: Op)) {
1082 Function *F = CI->getCalledFunction();
1083 if (!F)
1084 return false;
1085 Intrinsic::ID ID = F->getIntrinsicID();
1086 if (ID == Intrinsic::not_intrinsic || !isTriviallyScalarizable(ID, TTI))
1087 return false;
1088 // Note: Fall through means Operand is a`CallInst` and it is defined in
1089 // `isTriviallyScalarizable`.
1090 } else
1091 return false;
1092 Type *VecType = cast<FixedVectorType>(Val: OpTy->getContainedType(i: 0));
1093 std::optional<VectorSplit> VS = getVectorSplit(Ty: VecType);
1094 if (!VS)
1095 return false;
1096 for (unsigned I = 1; I < OpTy->getNumContainedTypes(); I++) {
1097 std::optional<VectorSplit> CurrVS =
1098 getVectorSplit(Ty: cast<FixedVectorType>(Val: OpTy->getContainedType(i: I)));
1099 // It is possible for VectorSplit.NumPacked >= NumElems. If that happens a
1100 // VectorSplit is not returned and we will bailout of handling this call.
1101 // The secondary bailout case is if NumPacked does not match. This can
1102 // happen if ScalarizeMinBits is not set to the default. This means with
1103 // certain ScalarizeMinBits intrinsics like frexp will only scalarize when
1104 // the struct elements have the same bitness.
1105 if (!CurrVS || CurrVS->NumPacked != VS->NumPacked)
1106 return false;
1107 }
1108 IRBuilder<> Builder(&EVI);
1109 Scatterer Op0 = scatter(Point: &EVI, V: Op, VS: *VS);
1110 assert(!EVI.getIndices().empty() && "Make sure an index exists");
1111 // Note for our use case we only care about the top level index.
1112 unsigned Index = EVI.getIndices()[0];
1113 for (unsigned OpIdx = 0; OpIdx < Op0.size(); ++OpIdx) {
1114 Value *ResElem = Builder.CreateExtractValue(
1115 Agg: Op0[OpIdx], Idxs: Index, Name: EVI.getName() + ".elem" + Twine(Index));
1116 Res.push_back(Elt: ResElem);
1117 }
1118
1119 Type *ActualVecType = cast<FixedVectorType>(Val: OpTy->getContainedType(i: Index));
1120 std::optional<VectorSplit> AVS = getVectorSplit(Ty: ActualVecType);
1121 gather(Op: &EVI, CV: Res, VS: *AVS);
1122 return true;
1123}
1124
1125bool ScalarizerVisitor::visitExtractElementInst(ExtractElementInst &EEI) {
1126 std::optional<VectorSplit> VS = getVectorSplit(Ty: EEI.getOperand(i_nocapture: 0)->getType());
1127 if (!VS)
1128 return false;
1129
1130 IRBuilder<> Builder(&EEI);
1131 Scatterer Op0 = scatter(Point: &EEI, V: EEI.getOperand(i_nocapture: 0), VS: *VS);
1132 Value *ExtIdx = EEI.getOperand(i_nocapture: 1);
1133
1134 if (auto *CI = dyn_cast<ConstantInt>(Val: ExtIdx)) {
1135 unsigned Idx = CI->getZExtValue();
1136 unsigned Fragment = Idx / VS->NumPacked;
1137 Value *Res = Op0[Fragment];
1138 bool IsPacked = VS->NumPacked > 1;
1139 if (Fragment == VS->NumFragments - 1 && VS->RemainderTy &&
1140 !VS->RemainderTy->isVectorTy())
1141 IsPacked = false;
1142 if (IsPacked)
1143 Res = Builder.CreateExtractElement(Vec: Res, Idx: Idx % VS->NumPacked);
1144 replaceUses(Op: &EEI, CV: Res);
1145 return true;
1146 }
1147
1148 // Never split a variable extractelement that isn't fully scalarized.
1149 if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1)
1150 return false;
1151
1152 Value *Res = PoisonValue::get(T: VS->VecTy->getElementType());
1153 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1154 Value *ShouldExtract =
1155 Builder.CreateICmpEQ(LHS: ExtIdx, RHS: ConstantInt::get(Ty: ExtIdx->getType(), V: I),
1156 Name: ExtIdx->getName() + ".is." + Twine(I));
1157 Value *Elt = Op0[I];
1158 Res = Builder.CreateSelect(C: ShouldExtract, True: Elt, False: Res,
1159 Name: EEI.getName() + ".upto" + Twine(I));
1160 }
1161 replaceUses(Op: &EEI, CV: Res);
1162 return true;
1163}
1164
1165bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
1166 std::optional<VectorSplit> VS = getVectorSplit(Ty: SVI.getType());
1167 std::optional<VectorSplit> VSOp =
1168 getVectorSplit(Ty: SVI.getOperand(i_nocapture: 0)->getType());
1169 if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1)
1170 return false;
1171
1172 Scatterer Op0 = scatter(Point: &SVI, V: SVI.getOperand(i_nocapture: 0), VS: *VSOp);
1173 Scatterer Op1 = scatter(Point: &SVI, V: SVI.getOperand(i_nocapture: 1), VS: *VSOp);
1174 ValueVector Res;
1175 Res.resize(N: VS->NumFragments);
1176
1177 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1178 int Selector = SVI.getMaskValue(Elt: I);
1179 if (Selector < 0)
1180 Res[I] = PoisonValue::get(T: VS->VecTy->getElementType());
1181 else if (unsigned(Selector) < Op0.size())
1182 Res[I] = Op0[Selector];
1183 else
1184 Res[I] = Op1[Selector - Op0.size()];
1185 }
1186 gather(Op: &SVI, CV: Res, VS: *VS);
1187 return true;
1188}
1189
1190bool ScalarizerVisitor::visitPHINode(PHINode &PHI) {
1191 std::optional<VectorSplit> VS = getVectorSplit(Ty: PHI.getType());
1192 if (!VS)
1193 return false;
1194
1195 IRBuilder<> Builder(&PHI);
1196 ValueVector Res;
1197 Res.resize(N: VS->NumFragments);
1198
1199 unsigned NumOps = PHI.getNumOperands();
1200 for (unsigned I = 0; I < VS->NumFragments; ++I) {
1201 Res[I] = Builder.CreatePHI(Ty: VS->getFragmentType(I), NumReservedValues: NumOps,
1202 Name: PHI.getName() + ".i" + Twine(I));
1203 }
1204
1205 for (unsigned I = 0; I < NumOps; ++I) {
1206 Scatterer Op = scatter(Point: &PHI, V: PHI.getIncomingValue(i: I), VS: *VS);
1207 BasicBlock *IncomingBlock = PHI.getIncomingBlock(i: I);
1208 for (unsigned J = 0; J < VS->NumFragments; ++J)
1209 cast<PHINode>(Val: Res[J])->addIncoming(V: Op[J], BB: IncomingBlock);
1210 }
1211 gather(Op: &PHI, CV: Res, VS: *VS);
1212 return true;
1213}
1214
1215bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) {
1216 if (!ScalarizeLoadStore)
1217 return false;
1218 if (!LI.isSimple())
1219 return false;
1220
1221 std::optional<VectorLayout> Layout = getVectorLayout(
1222 Ty: LI.getType(), Alignment: LI.getAlign(), DL: LI.getDataLayout());
1223 if (!Layout)
1224 return false;
1225
1226 IRBuilder<> Builder(&LI);
1227 Scatterer Ptr = scatter(Point: &LI, V: LI.getPointerOperand(), VS: Layout->VS);
1228 ValueVector Res;
1229 Res.resize(N: Layout->VS.NumFragments);
1230
1231 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1232 Res[I] = Builder.CreateAlignedLoad(Ty: Layout->VS.getFragmentType(I), Ptr: Ptr[I],
1233 Align: Align(Layout->getFragmentAlign(Frag: I)),
1234 Name: LI.getName() + ".i" + Twine(I));
1235 }
1236 gather(Op: &LI, CV: Res, VS: Layout->VS);
1237 return true;
1238}
1239
1240bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) {
1241 if (!ScalarizeLoadStore)
1242 return false;
1243 if (!SI.isSimple())
1244 return false;
1245
1246 Value *FullValue = SI.getValueOperand();
1247 std::optional<VectorLayout> Layout = getVectorLayout(
1248 Ty: FullValue->getType(), Alignment: SI.getAlign(), DL: SI.getDataLayout());
1249 if (!Layout)
1250 return false;
1251
1252 IRBuilder<> Builder(&SI);
1253 Scatterer VPtr = scatter(Point: &SI, V: SI.getPointerOperand(), VS: Layout->VS);
1254 Scatterer VVal = scatter(Point: &SI, V: FullValue, VS: Layout->VS);
1255
1256 ValueVector Stores;
1257 Stores.resize(N: Layout->VS.NumFragments);
1258 for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) {
1259 Value *Val = VVal[I];
1260 Value *Ptr = VPtr[I];
1261 Stores[I] =
1262 Builder.CreateAlignedStore(Val, Ptr, Align: Layout->getFragmentAlign(Frag: I));
1263 }
1264 transferMetadataAndIRFlags(Op: &SI, CV: Stores);
1265 return true;
1266}
1267
1268bool ScalarizerVisitor::visitCallInst(CallInst &CI) {
1269 return splitCall(CI);
1270}
1271
1272bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) {
1273 return splitUnary(I&: FI, Split: [](IRBuilder<> &Builder, Value *Op, const Twine &Name) {
1274 return Builder.CreateFreeze(V: Op, Name);
1275 });
1276}
1277
1278// Delete the instructions that we scalarized. If a full vector result
1279// is still needed, recreate it using InsertElements.
1280bool ScalarizerVisitor::finish() {
1281 // The presence of data in Gathered or Scattered indicates changes
1282 // made to the Function.
1283 if (Gathered.empty() && Scattered.empty() && !Scalarized)
1284 return false;
1285 for (const auto &GMI : Gathered) {
1286 Instruction *Op = GMI.first;
1287 ValueVector &CV = *GMI.second;
1288 if (!Op->use_empty()) {
1289 // The value is still needed, so recreate it using a series of
1290 // insertelements and/or shufflevectors.
1291 Value *Res;
1292 if (auto *Ty = dyn_cast<FixedVectorType>(Val: Op->getType())) {
1293 BasicBlock *BB = Op->getParent();
1294 IRBuilder<> Builder(Op);
1295 if (isa<PHINode>(Val: Op))
1296 Builder.SetInsertPoint(TheBB: BB, IP: BB->getFirstInsertionPt());
1297
1298 VectorSplit VS = *getVectorSplit(Ty);
1299 assert(VS.NumFragments == CV.size());
1300
1301 Res = concatenate(Builder, Fragments: CV, VS, Name: Op->getName());
1302
1303 Res->takeName(V: Op);
1304 } else if (auto *Ty = dyn_cast<StructType>(Val: Op->getType())) {
1305 BasicBlock *BB = Op->getParent();
1306 IRBuilder<> Builder(Op);
1307 if (isa<PHINode>(Val: Op))
1308 Builder.SetInsertPoint(TheBB: BB, IP: BB->getFirstInsertionPt());
1309
1310 // Iterate over each element in the struct
1311 unsigned NumOfStructElements = Ty->getNumElements();
1312 SmallVector<ValueVector, 4> ElemCV(NumOfStructElements);
1313 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1314 for (auto *CVelem : CV) {
1315 Value *Elem = Builder.CreateExtractValue(
1316 Agg: CVelem, Idxs: I, Name: Op->getName() + ".elem" + Twine(I));
1317 ElemCV[I].push_back(Elt: Elem);
1318 }
1319 }
1320 Res = PoisonValue::get(T: Ty);
1321 for (unsigned I = 0; I < NumOfStructElements; ++I) {
1322 Type *ElemTy = Ty->getElementType(N: I);
1323 assert(isa<FixedVectorType>(ElemTy) &&
1324 "Only Structs of all FixedVectorType supported");
1325 VectorSplit VS = *getVectorSplit(Ty: ElemTy);
1326 assert(VS.NumFragments == CV.size());
1327
1328 Value *ConcatenatedVector =
1329 concatenate(Builder, Fragments: ElemCV[I], VS, Name: Op->getName());
1330 Res = Builder.CreateInsertValue(Agg: Res, Val: ConcatenatedVector, Idxs: I,
1331 Name: Op->getName() + ".insert");
1332 }
1333 } else {
1334 assert(CV.size() == 1 && Op->getType() == CV[0]->getType());
1335 Res = CV[0];
1336 if (Op == Res)
1337 continue;
1338 }
1339 Op->replaceAllUsesWith(V: Res);
1340 }
1341 PotentiallyDeadInstrs.emplace_back(Args&: Op);
1342 }
1343 Gathered.clear();
1344 Scattered.clear();
1345 Scalarized = false;
1346
1347 RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: PotentiallyDeadInstrs);
1348
1349 return true;
1350}
1351
1352PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) {
1353 DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F);
1354 const TargetTransformInfo *TTI = &AM.getResult<TargetIRAnalysis>(IR&: F);
1355 ScalarizerVisitor Impl(DT, TTI, Options);
1356 bool Changed = Impl.visit(F);
1357 PreservedAnalyses PA;
1358 PA.preserve<DominatorTreeAnalysis>();
1359 return Changed ? PA : PreservedAnalyses::all();
1360}
1361