1 | //===- Scalarizer.cpp - Scalarize vector operations -----------------------===// |
2 | // |
3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
4 | // See https://llvm.org/LICENSE.txt for license information. |
5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
6 | // |
7 | //===----------------------------------------------------------------------===// |
8 | // |
9 | // This pass converts vector operations into scalar operations (or, optionally, |
10 | // operations on smaller vector widths), in order to expose optimization |
11 | // opportunities on the individual scalar operations. |
12 | // It is mainly intended for targets that do not have vector units, but it |
13 | // may also be useful for revectorizing code to different vector widths. |
14 | // |
15 | //===----------------------------------------------------------------------===// |
16 | |
17 | #include "llvm/Transforms/Scalar/Scalarizer.h" |
18 | #include "llvm/ADT/PostOrderIterator.h" |
19 | #include "llvm/ADT/SmallVector.h" |
20 | #include "llvm/ADT/Twine.h" |
21 | #include "llvm/Analysis/VectorUtils.h" |
22 | #include "llvm/IR/Argument.h" |
23 | #include "llvm/IR/BasicBlock.h" |
24 | #include "llvm/IR/Constants.h" |
25 | #include "llvm/IR/DataLayout.h" |
26 | #include "llvm/IR/DerivedTypes.h" |
27 | #include "llvm/IR/Dominators.h" |
28 | #include "llvm/IR/Function.h" |
29 | #include "llvm/IR/IRBuilder.h" |
30 | #include "llvm/IR/InstVisitor.h" |
31 | #include "llvm/IR/InstrTypes.h" |
32 | #include "llvm/IR/Instruction.h" |
33 | #include "llvm/IR/Instructions.h" |
34 | #include "llvm/IR/Intrinsics.h" |
35 | #include "llvm/IR/LLVMContext.h" |
36 | #include "llvm/IR/Module.h" |
37 | #include "llvm/IR/Type.h" |
38 | #include "llvm/IR/Value.h" |
39 | #include "llvm/Support/Casting.h" |
40 | #include "llvm/Support/CommandLine.h" |
41 | #include "llvm/Transforms/Utils/Local.h" |
42 | #include <cassert> |
43 | #include <cstdint> |
44 | #include <iterator> |
45 | #include <map> |
46 | #include <utility> |
47 | |
48 | using namespace llvm; |
49 | |
50 | #define DEBUG_TYPE "scalarizer" |
51 | |
52 | static cl::opt<bool> ( |
53 | "scalarize-variable-insert-extract" , cl::init(Val: true), cl::Hidden, |
54 | cl::desc("Allow the scalarizer pass to scalarize " |
55 | "insertelement/extractelement with variable index" )); |
56 | |
57 | // This is disabled by default because having separate loads and stores |
58 | // makes it more likely that the -combiner-alias-analysis limits will be |
59 | // reached. |
60 | static cl::opt<bool> ClScalarizeLoadStore( |
61 | "scalarize-load-store" , cl::init(Val: false), cl::Hidden, |
62 | cl::desc("Allow the scalarizer pass to scalarize loads and store" )); |
63 | |
64 | // Split vectors larger than this size into fragments, where each fragment is |
65 | // either a vector no larger than this size or a scalar. |
66 | // |
67 | // Instructions with operands or results of different sizes that would be split |
68 | // into a different number of fragments are currently left as-is. |
69 | static cl::opt<unsigned> ClScalarizeMinBits( |
70 | "scalarize-min-bits" , cl::init(Val: 0), cl::Hidden, |
71 | cl::desc("Instruct the scalarizer pass to attempt to keep values of a " |
72 | "minimum number of bits" )); |
73 | |
74 | namespace { |
75 | |
76 | BasicBlock::iterator skipPastPhiNodesAndDbg(BasicBlock::iterator Itr) { |
77 | BasicBlock *BB = Itr->getParent(); |
78 | if (isa<PHINode>(Val: Itr)) |
79 | Itr = BB->getFirstInsertionPt(); |
80 | if (Itr != BB->end()) |
81 | Itr = skipDebugIntrinsics(It: Itr); |
82 | return Itr; |
83 | } |
84 | |
85 | // Used to store the scattered form of a vector. |
86 | using ValueVector = SmallVector<Value *, 8>; |
87 | |
88 | // Used to map a vector Value and associated type to its scattered form. |
89 | // The associated type is only non-null for pointer values that are "scattered" |
90 | // when used as pointer operands to load or store. |
91 | // |
92 | // We use std::map because we want iterators to persist across insertion and |
93 | // because the values are relatively large. |
94 | using ScatterMap = std::map<std::pair<Value *, Type *>, ValueVector>; |
95 | |
96 | // Lists Instructions that have been replaced with scalar implementations, |
97 | // along with a pointer to their scattered forms. |
98 | using GatherList = SmallVector<std::pair<Instruction *, ValueVector *>, 16>; |
99 | |
100 | struct VectorSplit { |
101 | // The type of the vector. |
102 | FixedVectorType *VecTy = nullptr; |
103 | |
104 | // The number of elements packed in a fragment (other than the remainder). |
105 | unsigned NumPacked = 0; |
106 | |
107 | // The number of fragments (scalars or smaller vectors) into which the vector |
108 | // shall be split. |
109 | unsigned NumFragments = 0; |
110 | |
111 | // The type of each complete fragment. |
112 | Type *SplitTy = nullptr; |
113 | |
114 | // The type of the remainder (last) fragment; null if all fragments are |
115 | // complete. |
116 | Type *RemainderTy = nullptr; |
117 | |
118 | Type *getFragmentType(unsigned I) const { |
119 | return RemainderTy && I == NumFragments - 1 ? RemainderTy : SplitTy; |
120 | } |
121 | }; |
122 | |
123 | // Provides a very limited vector-like interface for lazily accessing one |
124 | // component of a scattered vector or vector pointer. |
125 | class Scatterer { |
126 | public: |
127 | Scatterer() = default; |
128 | |
129 | // Scatter V into Size components. If new instructions are needed, |
130 | // insert them before BBI in BB. If Cache is nonnull, use it to cache |
131 | // the results. |
132 | Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, |
133 | const VectorSplit &VS, ValueVector *cachePtr = nullptr); |
134 | |
135 | // Return component I, creating a new Value for it if necessary. |
136 | Value *operator[](unsigned I); |
137 | |
138 | // Return the number of components. |
139 | unsigned size() const { return VS.NumFragments; } |
140 | |
141 | private: |
142 | BasicBlock *BB; |
143 | BasicBlock::iterator BBI; |
144 | Value *V; |
145 | VectorSplit VS; |
146 | bool IsPointer; |
147 | ValueVector *CachePtr; |
148 | ValueVector Tmp; |
149 | }; |
150 | |
151 | // FCmpSplitter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp |
152 | // called Name that compares X and Y in the same way as FCI. |
153 | struct FCmpSplitter { |
154 | FCmpSplitter(FCmpInst &fci) : FCI(fci) {} |
155 | |
156 | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
157 | const Twine &Name) const { |
158 | return Builder.CreateFCmp(P: FCI.getPredicate(), LHS: Op0, RHS: Op1, Name); |
159 | } |
160 | |
161 | FCmpInst &FCI; |
162 | }; |
163 | |
164 | // ICmpSplitter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp |
165 | // called Name that compares X and Y in the same way as ICI. |
166 | struct ICmpSplitter { |
167 | ICmpSplitter(ICmpInst &ici) : ICI(ici) {} |
168 | |
169 | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
170 | const Twine &Name) const { |
171 | return Builder.CreateICmp(P: ICI.getPredicate(), LHS: Op0, RHS: Op1, Name); |
172 | } |
173 | |
174 | ICmpInst &ICI; |
175 | }; |
176 | |
177 | // UnarySplitter(UO)(Builder, X, Name) uses Builder to create |
178 | // a unary operator like UO called Name with operand X. |
179 | struct UnarySplitter { |
180 | UnarySplitter(UnaryOperator &uo) : UO(uo) {} |
181 | |
182 | Value *operator()(IRBuilder<> &Builder, Value *Op, const Twine &Name) const { |
183 | return Builder.CreateUnOp(Opc: UO.getOpcode(), V: Op, Name); |
184 | } |
185 | |
186 | UnaryOperator &UO; |
187 | }; |
188 | |
189 | // BinarySplitter(BO)(Builder, X, Y, Name) uses Builder to create |
190 | // a binary operator like BO called Name with operands X and Y. |
191 | struct BinarySplitter { |
192 | BinarySplitter(BinaryOperator &bo) : BO(bo) {} |
193 | |
194 | Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1, |
195 | const Twine &Name) const { |
196 | return Builder.CreateBinOp(Opc: BO.getOpcode(), LHS: Op0, RHS: Op1, Name); |
197 | } |
198 | |
199 | BinaryOperator &BO; |
200 | }; |
201 | |
202 | // Information about a load or store that we're scalarizing. |
203 | struct VectorLayout { |
204 | VectorLayout() = default; |
205 | |
206 | // Return the alignment of fragment Frag. |
207 | Align getFragmentAlign(unsigned Frag) { |
208 | return commonAlignment(A: VecAlign, Offset: Frag * SplitSize); |
209 | } |
210 | |
211 | // The split of the underlying vector type. |
212 | VectorSplit VS; |
213 | |
214 | // The alignment of the vector. |
215 | Align VecAlign; |
216 | |
217 | // The size of each (non-remainder) fragment in bytes. |
218 | uint64_t SplitSize = 0; |
219 | }; |
220 | |
221 | /// Concatenate the given fragments to a single vector value of the type |
222 | /// described in @p VS. |
223 | static Value *concatenate(IRBuilder<> &Builder, ArrayRef<Value *> Fragments, |
224 | const VectorSplit &VS, Twine Name) { |
225 | unsigned NumElements = VS.VecTy->getNumElements(); |
226 | SmallVector<int> ExtendMask; |
227 | SmallVector<int> InsertMask; |
228 | |
229 | if (VS.NumPacked > 1) { |
230 | // Prepare the shufflevector masks once and re-use them for all |
231 | // fragments. |
232 | ExtendMask.resize(N: NumElements, NV: -1); |
233 | for (unsigned I = 0; I < VS.NumPacked; ++I) |
234 | ExtendMask[I] = I; |
235 | |
236 | InsertMask.resize(N: NumElements); |
237 | for (unsigned I = 0; I < NumElements; ++I) |
238 | InsertMask[I] = I; |
239 | } |
240 | |
241 | Value *Res = PoisonValue::get(T: VS.VecTy); |
242 | for (unsigned I = 0; I < VS.NumFragments; ++I) { |
243 | Value *Fragment = Fragments[I]; |
244 | |
245 | unsigned NumPacked = VS.NumPacked; |
246 | if (I == VS.NumFragments - 1 && VS.RemainderTy) { |
247 | if (auto *RemVecTy = dyn_cast<FixedVectorType>(Val: VS.RemainderTy)) |
248 | NumPacked = RemVecTy->getNumElements(); |
249 | else |
250 | NumPacked = 1; |
251 | } |
252 | |
253 | if (NumPacked == 1) { |
254 | Res = Builder.CreateInsertElement(Vec: Res, NewElt: Fragment, Idx: I * VS.NumPacked, |
255 | Name: Name + ".upto" + Twine(I)); |
256 | } else { |
257 | Fragment = Builder.CreateShuffleVector(V1: Fragment, V2: Fragment, Mask: ExtendMask); |
258 | if (I == 0) { |
259 | Res = Fragment; |
260 | } else { |
261 | for (unsigned J = 0; J < NumPacked; ++J) |
262 | InsertMask[I * VS.NumPacked + J] = NumElements + J; |
263 | Res = Builder.CreateShuffleVector(V1: Res, V2: Fragment, Mask: InsertMask, |
264 | Name: Name + ".upto" + Twine(I)); |
265 | for (unsigned J = 0; J < NumPacked; ++J) |
266 | InsertMask[I * VS.NumPacked + J] = I * VS.NumPacked + J; |
267 | } |
268 | } |
269 | } |
270 | |
271 | return Res; |
272 | } |
273 | |
274 | template <typename T> |
275 | T getWithDefaultOverride(const cl::opt<T> &ClOption, |
276 | const std::optional<T> &DefaultOverride) { |
277 | return ClOption.getNumOccurrences() ? ClOption |
278 | : DefaultOverride.value_or(ClOption); |
279 | } |
280 | |
281 | class ScalarizerVisitor : public InstVisitor<ScalarizerVisitor, bool> { |
282 | public: |
283 | ScalarizerVisitor(DominatorTree *DT, ScalarizerPassOptions Options) |
284 | : DT(DT), ScalarizeVariableInsertExtract(getWithDefaultOverride( |
285 | ClOption: ClScalarizeVariableInsertExtract, |
286 | DefaultOverride: Options.ScalarizeVariableInsertExtract)), |
287 | ScalarizeLoadStore(getWithDefaultOverride(ClOption: ClScalarizeLoadStore, |
288 | DefaultOverride: Options.ScalarizeLoadStore)), |
289 | ScalarizeMinBits(getWithDefaultOverride(ClOption: ClScalarizeMinBits, |
290 | DefaultOverride: Options.ScalarizeMinBits)) {} |
291 | |
292 | bool visit(Function &F); |
293 | |
294 | // InstVisitor methods. They return true if the instruction was scalarized, |
295 | // false if nothing changed. |
296 | bool visitInstruction(Instruction &I) { return false; } |
297 | bool visitSelectInst(SelectInst &SI); |
298 | bool visitICmpInst(ICmpInst &ICI); |
299 | bool visitFCmpInst(FCmpInst &FCI); |
300 | bool visitUnaryOperator(UnaryOperator &UO); |
301 | bool visitBinaryOperator(BinaryOperator &BO); |
302 | bool visitGetElementPtrInst(GetElementPtrInst &GEPI); |
303 | bool visitCastInst(CastInst &CI); |
304 | bool visitBitCastInst(BitCastInst &BCI); |
305 | bool visitInsertElementInst(InsertElementInst &IEI); |
306 | bool visitExtractElementInst(ExtractElementInst &EEI); |
307 | bool visitShuffleVectorInst(ShuffleVectorInst &SVI); |
308 | bool visitPHINode(PHINode &PHI); |
309 | bool visitLoadInst(LoadInst &LI); |
310 | bool visitStoreInst(StoreInst &SI); |
311 | bool visitCallInst(CallInst &ICI); |
312 | bool visitFreezeInst(FreezeInst &FI); |
313 | |
314 | private: |
315 | Scatterer scatter(Instruction *Point, Value *V, const VectorSplit &VS); |
316 | void gather(Instruction *Op, const ValueVector &CV, const VectorSplit &VS); |
317 | void replaceUses(Instruction *Op, Value *CV); |
318 | bool canTransferMetadata(unsigned Kind); |
319 | void transferMetadataAndIRFlags(Instruction *Op, const ValueVector &CV); |
320 | std::optional<VectorSplit> getVectorSplit(Type *Ty); |
321 | std::optional<VectorLayout> getVectorLayout(Type *Ty, Align Alignment, |
322 | const DataLayout &DL); |
323 | bool finish(); |
324 | |
325 | template<typename T> bool splitUnary(Instruction &, const T &); |
326 | template<typename T> bool splitBinary(Instruction &, const T &); |
327 | |
328 | bool splitCall(CallInst &CI); |
329 | |
330 | ScatterMap Scattered; |
331 | GatherList Gathered; |
332 | bool Scalarized; |
333 | |
334 | SmallVector<WeakTrackingVH, 32> PotentiallyDeadInstrs; |
335 | |
336 | DominatorTree *DT; |
337 | |
338 | const bool ; |
339 | const bool ScalarizeLoadStore; |
340 | const unsigned ScalarizeMinBits; |
341 | }; |
342 | |
343 | } // end anonymous namespace |
344 | |
345 | Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v, |
346 | const VectorSplit &VS, ValueVector *cachePtr) |
347 | : BB(bb), BBI(bbi), V(v), VS(VS), CachePtr(cachePtr) { |
348 | IsPointer = V->getType()->isPointerTy(); |
349 | if (!CachePtr) { |
350 | Tmp.resize(N: VS.NumFragments, NV: nullptr); |
351 | } else { |
352 | assert((CachePtr->empty() || VS.NumFragments == CachePtr->size() || |
353 | IsPointer) && |
354 | "Inconsistent vector sizes" ); |
355 | if (VS.NumFragments > CachePtr->size()) |
356 | CachePtr->resize(N: VS.NumFragments, NV: nullptr); |
357 | } |
358 | } |
359 | |
360 | // Return fragment Frag, creating a new Value for it if necessary. |
361 | Value *Scatterer::operator[](unsigned Frag) { |
362 | ValueVector &CV = CachePtr ? *CachePtr : Tmp; |
363 | // Try to reuse a previous value. |
364 | if (CV[Frag]) |
365 | return CV[Frag]; |
366 | IRBuilder<> Builder(BB, BBI); |
367 | if (IsPointer) { |
368 | if (Frag == 0) |
369 | CV[Frag] = V; |
370 | else |
371 | CV[Frag] = Builder.CreateConstGEP1_32(Ty: VS.SplitTy, Ptr: V, Idx0: Frag, |
372 | Name: V->getName() + ".i" + Twine(Frag)); |
373 | return CV[Frag]; |
374 | } |
375 | |
376 | Type *FragmentTy = VS.getFragmentType(I: Frag); |
377 | |
378 | if (auto *VecTy = dyn_cast<FixedVectorType>(Val: FragmentTy)) { |
379 | SmallVector<int> Mask; |
380 | for (unsigned J = 0; J < VecTy->getNumElements(); ++J) |
381 | Mask.push_back(Elt: Frag * VS.NumPacked + J); |
382 | CV[Frag] = |
383 | Builder.CreateShuffleVector(V1: V, V2: PoisonValue::get(T: V->getType()), Mask, |
384 | Name: V->getName() + ".i" + Twine(Frag)); |
385 | } else { |
386 | // Search through a chain of InsertElementInsts looking for element Frag. |
387 | // Record other elements in the cache. The new V is still suitable |
388 | // for all uncached indices. |
389 | while (true) { |
390 | InsertElementInst *Insert = dyn_cast<InsertElementInst>(Val: V); |
391 | if (!Insert) |
392 | break; |
393 | ConstantInt *Idx = dyn_cast<ConstantInt>(Val: Insert->getOperand(i_nocapture: 2)); |
394 | if (!Idx) |
395 | break; |
396 | unsigned J = Idx->getZExtValue(); |
397 | V = Insert->getOperand(i_nocapture: 0); |
398 | if (Frag * VS.NumPacked == J) { |
399 | CV[Frag] = Insert->getOperand(i_nocapture: 1); |
400 | return CV[Frag]; |
401 | } |
402 | |
403 | if (VS.NumPacked == 1 && !CV[J]) { |
404 | // Only cache the first entry we find for each index we're not actively |
405 | // searching for. This prevents us from going too far up the chain and |
406 | // caching incorrect entries. |
407 | CV[J] = Insert->getOperand(i_nocapture: 1); |
408 | } |
409 | } |
410 | CV[Frag] = Builder.CreateExtractElement(Vec: V, Idx: Frag * VS.NumPacked, |
411 | Name: V->getName() + ".i" + Twine(Frag)); |
412 | } |
413 | |
414 | return CV[Frag]; |
415 | } |
416 | |
417 | bool ScalarizerVisitor::visit(Function &F) { |
418 | assert(Gathered.empty() && Scattered.empty()); |
419 | |
420 | Scalarized = false; |
421 | |
422 | // To ensure we replace gathered components correctly we need to do an ordered |
423 | // traversal of the basic blocks in the function. |
424 | ReversePostOrderTraversal<BasicBlock *> RPOT(&F.getEntryBlock()); |
425 | for (BasicBlock *BB : RPOT) { |
426 | for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE;) { |
427 | Instruction *I = &*II; |
428 | bool Done = InstVisitor::visit(I); |
429 | ++II; |
430 | if (Done && I->getType()->isVoidTy()) |
431 | I->eraseFromParent(); |
432 | } |
433 | } |
434 | return finish(); |
435 | } |
436 | |
437 | // Return a scattered form of V that can be accessed by Point. V must be a |
438 | // vector or a pointer to a vector. |
439 | Scatterer ScalarizerVisitor::scatter(Instruction *Point, Value *V, |
440 | const VectorSplit &VS) { |
441 | if (Argument *VArg = dyn_cast<Argument>(Val: V)) { |
442 | // Put the scattered form of arguments in the entry block, |
443 | // so that it can be used everywhere. |
444 | Function *F = VArg->getParent(); |
445 | BasicBlock *BB = &F->getEntryBlock(); |
446 | return Scatterer(BB, BB->begin(), V, VS, &Scattered[{V, VS.SplitTy}]); |
447 | } |
448 | if (Instruction *VOp = dyn_cast<Instruction>(Val: V)) { |
449 | // When scalarizing PHI nodes we might try to examine/rewrite InsertElement |
450 | // nodes in predecessors. If those predecessors are unreachable from entry, |
451 | // then the IR in those blocks could have unexpected properties resulting in |
452 | // infinite loops in Scatterer::operator[]. By simply treating values |
453 | // originating from instructions in unreachable blocks as undef we do not |
454 | // need to analyse them further. |
455 | if (!DT->isReachableFromEntry(A: VOp->getParent())) |
456 | return Scatterer(Point->getParent(), Point->getIterator(), |
457 | PoisonValue::get(T: V->getType()), VS); |
458 | // Put the scattered form of an instruction directly after the |
459 | // instruction, skipping over PHI nodes and debug intrinsics. |
460 | BasicBlock *BB = VOp->getParent(); |
461 | return Scatterer( |
462 | BB, skipPastPhiNodesAndDbg(Itr: std::next(x: BasicBlock::iterator(VOp))), V, VS, |
463 | &Scattered[{V, VS.SplitTy}]); |
464 | } |
465 | // In the fallback case, just put the scattered before Point and |
466 | // keep the result local to Point. |
467 | return Scatterer(Point->getParent(), Point->getIterator(), V, VS); |
468 | } |
469 | |
470 | // Replace Op with the gathered form of the components in CV. Defer the |
471 | // deletion of Op and creation of the gathered form to the end of the pass, |
472 | // so that we can avoid creating the gathered form if all uses of Op are |
473 | // replaced with uses of CV. |
474 | void ScalarizerVisitor::gather(Instruction *Op, const ValueVector &CV, |
475 | const VectorSplit &VS) { |
476 | transferMetadataAndIRFlags(Op, CV); |
477 | |
478 | // If we already have a scattered form of Op (created from ExtractElements |
479 | // of Op itself), replace them with the new form. |
480 | ValueVector &SV = Scattered[{Op, VS.SplitTy}]; |
481 | if (!SV.empty()) { |
482 | for (unsigned I = 0, E = SV.size(); I != E; ++I) { |
483 | Value *V = SV[I]; |
484 | if (V == nullptr || SV[I] == CV[I]) |
485 | continue; |
486 | |
487 | Instruction *Old = cast<Instruction>(Val: V); |
488 | if (isa<Instruction>(Val: CV[I])) |
489 | CV[I]->takeName(V: Old); |
490 | Old->replaceAllUsesWith(V: CV[I]); |
491 | PotentiallyDeadInstrs.emplace_back(Args&: Old); |
492 | } |
493 | } |
494 | SV = CV; |
495 | Gathered.push_back(Elt: GatherList::value_type(Op, &SV)); |
496 | } |
497 | |
498 | // Replace Op with CV and collect Op has a potentially dead instruction. |
499 | void ScalarizerVisitor::replaceUses(Instruction *Op, Value *CV) { |
500 | if (CV != Op) { |
501 | Op->replaceAllUsesWith(V: CV); |
502 | PotentiallyDeadInstrs.emplace_back(Args&: Op); |
503 | Scalarized = true; |
504 | } |
505 | } |
506 | |
507 | // Return true if it is safe to transfer the given metadata tag from |
508 | // vector to scalar instructions. |
509 | bool ScalarizerVisitor::canTransferMetadata(unsigned Tag) { |
510 | return (Tag == LLVMContext::MD_tbaa |
511 | || Tag == LLVMContext::MD_fpmath |
512 | || Tag == LLVMContext::MD_tbaa_struct |
513 | || Tag == LLVMContext::MD_invariant_load |
514 | || Tag == LLVMContext::MD_alias_scope |
515 | || Tag == LLVMContext::MD_noalias |
516 | || Tag == LLVMContext::MD_mem_parallel_loop_access |
517 | || Tag == LLVMContext::MD_access_group); |
518 | } |
519 | |
520 | // Transfer metadata from Op to the instructions in CV if it is known |
521 | // to be safe to do so. |
522 | void ScalarizerVisitor::transferMetadataAndIRFlags(Instruction *Op, |
523 | const ValueVector &CV) { |
524 | SmallVector<std::pair<unsigned, MDNode *>, 4> MDs; |
525 | Op->getAllMetadataOtherThanDebugLoc(MDs); |
526 | for (Value *V : CV) { |
527 | if (Instruction *New = dyn_cast<Instruction>(Val: V)) { |
528 | for (const auto &MD : MDs) |
529 | if (canTransferMetadata(Tag: MD.first)) |
530 | New->setMetadata(KindID: MD.first, Node: MD.second); |
531 | New->copyIRFlags(V: Op); |
532 | if (Op->getDebugLoc() && !New->getDebugLoc()) |
533 | New->setDebugLoc(Op->getDebugLoc()); |
534 | } |
535 | } |
536 | } |
537 | |
538 | // Determine how Ty is split, if at all. |
539 | std::optional<VectorSplit> ScalarizerVisitor::getVectorSplit(Type *Ty) { |
540 | VectorSplit Split; |
541 | Split.VecTy = dyn_cast<FixedVectorType>(Val: Ty); |
542 | if (!Split.VecTy) |
543 | return {}; |
544 | |
545 | unsigned NumElems = Split.VecTy->getNumElements(); |
546 | Type *ElemTy = Split.VecTy->getElementType(); |
547 | |
548 | if (NumElems == 1 || ElemTy->isPointerTy() || |
549 | 2 * ElemTy->getScalarSizeInBits() > ScalarizeMinBits) { |
550 | Split.NumPacked = 1; |
551 | Split.NumFragments = NumElems; |
552 | Split.SplitTy = ElemTy; |
553 | } else { |
554 | Split.NumPacked = ScalarizeMinBits / ElemTy->getScalarSizeInBits(); |
555 | if (Split.NumPacked >= NumElems) |
556 | return {}; |
557 | |
558 | Split.NumFragments = divideCeil(Numerator: NumElems, Denominator: Split.NumPacked); |
559 | Split.SplitTy = FixedVectorType::get(ElementType: ElemTy, NumElts: Split.NumPacked); |
560 | |
561 | unsigned RemainderElems = NumElems % Split.NumPacked; |
562 | if (RemainderElems > 1) |
563 | Split.RemainderTy = FixedVectorType::get(ElementType: ElemTy, NumElts: RemainderElems); |
564 | else if (RemainderElems == 1) |
565 | Split.RemainderTy = ElemTy; |
566 | } |
567 | |
568 | return Split; |
569 | } |
570 | |
571 | // Try to fill in Layout from Ty, returning true on success. Alignment is |
572 | // the alignment of the vector, or std::nullopt if the ABI default should be |
573 | // used. |
574 | std::optional<VectorLayout> |
575 | ScalarizerVisitor::getVectorLayout(Type *Ty, Align Alignment, |
576 | const DataLayout &DL) { |
577 | std::optional<VectorSplit> VS = getVectorSplit(Ty); |
578 | if (!VS) |
579 | return {}; |
580 | |
581 | VectorLayout Layout; |
582 | Layout.VS = *VS; |
583 | // Check that we're dealing with full-byte fragments. |
584 | if (!DL.typeSizeEqualsStoreSize(Ty: VS->SplitTy) || |
585 | (VS->RemainderTy && !DL.typeSizeEqualsStoreSize(Ty: VS->RemainderTy))) |
586 | return {}; |
587 | Layout.VecAlign = Alignment; |
588 | Layout.SplitSize = DL.getTypeStoreSize(Ty: VS->SplitTy); |
589 | return Layout; |
590 | } |
591 | |
592 | // Scalarize one-operand instruction I, using Split(Builder, X, Name) |
593 | // to create an instruction like I with operand X and name Name. |
594 | template<typename Splitter> |
595 | bool ScalarizerVisitor::splitUnary(Instruction &I, const Splitter &Split) { |
596 | std::optional<VectorSplit> VS = getVectorSplit(Ty: I.getType()); |
597 | if (!VS) |
598 | return false; |
599 | |
600 | std::optional<VectorSplit> OpVS; |
601 | if (I.getOperand(i: 0)->getType() == I.getType()) { |
602 | OpVS = VS; |
603 | } else { |
604 | OpVS = getVectorSplit(Ty: I.getOperand(i: 0)->getType()); |
605 | if (!OpVS || VS->NumPacked != OpVS->NumPacked) |
606 | return false; |
607 | } |
608 | |
609 | IRBuilder<> Builder(&I); |
610 | Scatterer Op = scatter(Point: &I, V: I.getOperand(i: 0), VS: *OpVS); |
611 | assert(Op.size() == VS->NumFragments && "Mismatched unary operation" ); |
612 | ValueVector Res; |
613 | Res.resize(N: VS->NumFragments); |
614 | for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) |
615 | Res[Frag] = Split(Builder, Op[Frag], I.getName() + ".i" + Twine(Frag)); |
616 | gather(Op: &I, CV: Res, VS: *VS); |
617 | return true; |
618 | } |
619 | |
620 | // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name) |
621 | // to create an instruction like I with operands X and Y and name Name. |
622 | template<typename Splitter> |
623 | bool ScalarizerVisitor::splitBinary(Instruction &I, const Splitter &Split) { |
624 | std::optional<VectorSplit> VS = getVectorSplit(Ty: I.getType()); |
625 | if (!VS) |
626 | return false; |
627 | |
628 | std::optional<VectorSplit> OpVS; |
629 | if (I.getOperand(i: 0)->getType() == I.getType()) { |
630 | OpVS = VS; |
631 | } else { |
632 | OpVS = getVectorSplit(Ty: I.getOperand(i: 0)->getType()); |
633 | if (!OpVS || VS->NumPacked != OpVS->NumPacked) |
634 | return false; |
635 | } |
636 | |
637 | IRBuilder<> Builder(&I); |
638 | Scatterer VOp0 = scatter(Point: &I, V: I.getOperand(i: 0), VS: *OpVS); |
639 | Scatterer VOp1 = scatter(Point: &I, V: I.getOperand(i: 1), VS: *OpVS); |
640 | assert(VOp0.size() == VS->NumFragments && "Mismatched binary operation" ); |
641 | assert(VOp1.size() == VS->NumFragments && "Mismatched binary operation" ); |
642 | ValueVector Res; |
643 | Res.resize(N: VS->NumFragments); |
644 | for (unsigned Frag = 0; Frag < VS->NumFragments; ++Frag) { |
645 | Value *Op0 = VOp0[Frag]; |
646 | Value *Op1 = VOp1[Frag]; |
647 | Res[Frag] = Split(Builder, Op0, Op1, I.getName() + ".i" + Twine(Frag)); |
648 | } |
649 | gather(Op: &I, CV: Res, VS: *VS); |
650 | return true; |
651 | } |
652 | |
653 | static bool isTriviallyScalariable(Intrinsic::ID ID) { |
654 | return isTriviallyVectorizable(ID); |
655 | } |
656 | |
657 | /// If a call to a vector typed intrinsic function, split into a scalar call per |
658 | /// element if possible for the intrinsic. |
659 | bool ScalarizerVisitor::splitCall(CallInst &CI) { |
660 | std::optional<VectorSplit> VS = getVectorSplit(Ty: CI.getType()); |
661 | if (!VS) |
662 | return false; |
663 | |
664 | Function *F = CI.getCalledFunction(); |
665 | if (!F) |
666 | return false; |
667 | |
668 | Intrinsic::ID ID = F->getIntrinsicID(); |
669 | if (ID == Intrinsic::not_intrinsic || !isTriviallyScalariable(ID)) |
670 | return false; |
671 | |
672 | // unsigned NumElems = VT->getNumElements(); |
673 | unsigned NumArgs = CI.arg_size(); |
674 | |
675 | ValueVector ScalarOperands(NumArgs); |
676 | SmallVector<Scatterer, 8> Scattered(NumArgs); |
677 | SmallVector<int> OverloadIdx(NumArgs, -1); |
678 | |
679 | SmallVector<llvm::Type *, 3> Tys; |
680 | // Add return type if intrinsic is overloaded on it. |
681 | if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: -1)) |
682 | Tys.push_back(Elt: VS->SplitTy); |
683 | |
684 | // Assumes that any vector type has the same number of elements as the return |
685 | // vector type, which is true for all current intrinsics. |
686 | for (unsigned I = 0; I != NumArgs; ++I) { |
687 | Value *OpI = CI.getOperand(i_nocapture: I); |
688 | if ([[maybe_unused]] auto *OpVecTy = |
689 | dyn_cast<FixedVectorType>(Val: OpI->getType())) { |
690 | assert(OpVecTy->getNumElements() == VS->VecTy->getNumElements()); |
691 | std::optional<VectorSplit> OpVS = getVectorSplit(Ty: OpI->getType()); |
692 | if (!OpVS || OpVS->NumPacked != VS->NumPacked) { |
693 | // The natural split of the operand doesn't match the result. This could |
694 | // happen if the vector elements are different and the ScalarizeMinBits |
695 | // option is used. |
696 | // |
697 | // We could in principle handle this case as well, at the cost of |
698 | // complicating the scattering machinery to support multiple scattering |
699 | // granularities for a single value. |
700 | return false; |
701 | } |
702 | |
703 | Scattered[I] = scatter(Point: &CI, V: OpI, VS: *OpVS); |
704 | if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: I)) { |
705 | OverloadIdx[I] = Tys.size(); |
706 | Tys.push_back(Elt: OpVS->SplitTy); |
707 | } |
708 | } else { |
709 | ScalarOperands[I] = OpI; |
710 | if (isVectorIntrinsicWithOverloadTypeAtArg(ID, OpdIdx: I)) |
711 | Tys.push_back(Elt: OpI->getType()); |
712 | } |
713 | } |
714 | |
715 | ValueVector Res(VS->NumFragments); |
716 | ValueVector ScalarCallOps(NumArgs); |
717 | |
718 | Function *NewIntrin = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
719 | IRBuilder<> Builder(&CI); |
720 | |
721 | // Perform actual scalarization, taking care to preserve any scalar operands. |
722 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
723 | bool IsRemainder = I == VS->NumFragments - 1 && VS->RemainderTy; |
724 | ScalarCallOps.clear(); |
725 | |
726 | if (IsRemainder) |
727 | Tys[0] = VS->RemainderTy; |
728 | |
729 | for (unsigned J = 0; J != NumArgs; ++J) { |
730 | if (isVectorIntrinsicWithScalarOpAtArg(ID, ScalarOpdIdx: J)) { |
731 | ScalarCallOps.push_back(Elt: ScalarOperands[J]); |
732 | } else { |
733 | ScalarCallOps.push_back(Elt: Scattered[J][I]); |
734 | if (IsRemainder && OverloadIdx[J] >= 0) |
735 | Tys[OverloadIdx[J]] = Scattered[J][I]->getType(); |
736 | } |
737 | } |
738 | |
739 | if (IsRemainder) |
740 | NewIntrin = Intrinsic::getDeclaration(M: F->getParent(), id: ID, Tys); |
741 | |
742 | Res[I] = Builder.CreateCall(Callee: NewIntrin, Args: ScalarCallOps, |
743 | Name: CI.getName() + ".i" + Twine(I)); |
744 | } |
745 | |
746 | gather(Op: &CI, CV: Res, VS: *VS); |
747 | return true; |
748 | } |
749 | |
750 | bool ScalarizerVisitor::visitSelectInst(SelectInst &SI) { |
751 | std::optional<VectorSplit> VS = getVectorSplit(Ty: SI.getType()); |
752 | if (!VS) |
753 | return false; |
754 | |
755 | std::optional<VectorSplit> CondVS; |
756 | if (isa<FixedVectorType>(Val: SI.getCondition()->getType())) { |
757 | CondVS = getVectorSplit(Ty: SI.getCondition()->getType()); |
758 | if (!CondVS || CondVS->NumPacked != VS->NumPacked) { |
759 | // This happens when ScalarizeMinBits is used. |
760 | return false; |
761 | } |
762 | } |
763 | |
764 | IRBuilder<> Builder(&SI); |
765 | Scatterer VOp1 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 1), VS: *VS); |
766 | Scatterer VOp2 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 2), VS: *VS); |
767 | assert(VOp1.size() == VS->NumFragments && "Mismatched select" ); |
768 | assert(VOp2.size() == VS->NumFragments && "Mismatched select" ); |
769 | ValueVector Res; |
770 | Res.resize(N: VS->NumFragments); |
771 | |
772 | if (CondVS) { |
773 | Scatterer VOp0 = scatter(Point: &SI, V: SI.getOperand(i_nocapture: 0), VS: *CondVS); |
774 | assert(VOp0.size() == CondVS->NumFragments && "Mismatched select" ); |
775 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
776 | Value *Op0 = VOp0[I]; |
777 | Value *Op1 = VOp1[I]; |
778 | Value *Op2 = VOp2[I]; |
779 | Res[I] = Builder.CreateSelect(C: Op0, True: Op1, False: Op2, |
780 | Name: SI.getName() + ".i" + Twine(I)); |
781 | } |
782 | } else { |
783 | Value *Op0 = SI.getOperand(i_nocapture: 0); |
784 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
785 | Value *Op1 = VOp1[I]; |
786 | Value *Op2 = VOp2[I]; |
787 | Res[I] = Builder.CreateSelect(C: Op0, True: Op1, False: Op2, |
788 | Name: SI.getName() + ".i" + Twine(I)); |
789 | } |
790 | } |
791 | gather(Op: &SI, CV: Res, VS: *VS); |
792 | return true; |
793 | } |
794 | |
795 | bool ScalarizerVisitor::visitICmpInst(ICmpInst &ICI) { |
796 | return splitBinary(I&: ICI, Split: ICmpSplitter(ICI)); |
797 | } |
798 | |
799 | bool ScalarizerVisitor::visitFCmpInst(FCmpInst &FCI) { |
800 | return splitBinary(I&: FCI, Split: FCmpSplitter(FCI)); |
801 | } |
802 | |
803 | bool ScalarizerVisitor::visitUnaryOperator(UnaryOperator &UO) { |
804 | return splitUnary(I&: UO, Split: UnarySplitter(UO)); |
805 | } |
806 | |
807 | bool ScalarizerVisitor::visitBinaryOperator(BinaryOperator &BO) { |
808 | return splitBinary(I&: BO, Split: BinarySplitter(BO)); |
809 | } |
810 | |
811 | bool ScalarizerVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) { |
812 | std::optional<VectorSplit> VS = getVectorSplit(Ty: GEPI.getType()); |
813 | if (!VS) |
814 | return false; |
815 | |
816 | IRBuilder<> Builder(&GEPI); |
817 | unsigned NumIndices = GEPI.getNumIndices(); |
818 | |
819 | // The base pointer and indices might be scalar even if it's a vector GEP. |
820 | SmallVector<Value *, 8> ScalarOps{1 + NumIndices}; |
821 | SmallVector<Scatterer, 8> ScatterOps{1 + NumIndices}; |
822 | |
823 | for (unsigned I = 0; I < 1 + NumIndices; ++I) { |
824 | if (auto *VecTy = |
825 | dyn_cast<FixedVectorType>(Val: GEPI.getOperand(i_nocapture: I)->getType())) { |
826 | std::optional<VectorSplit> OpVS = getVectorSplit(Ty: VecTy); |
827 | if (!OpVS || OpVS->NumPacked != VS->NumPacked) { |
828 | // This can happen when ScalarizeMinBits is used. |
829 | return false; |
830 | } |
831 | ScatterOps[I] = scatter(Point: &GEPI, V: GEPI.getOperand(i_nocapture: I), VS: *OpVS); |
832 | } else { |
833 | ScalarOps[I] = GEPI.getOperand(i_nocapture: I); |
834 | } |
835 | } |
836 | |
837 | ValueVector Res; |
838 | Res.resize(N: VS->NumFragments); |
839 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
840 | SmallVector<Value *, 8> SplitOps; |
841 | SplitOps.resize(N: 1 + NumIndices); |
842 | for (unsigned J = 0; J < 1 + NumIndices; ++J) { |
843 | if (ScalarOps[J]) |
844 | SplitOps[J] = ScalarOps[J]; |
845 | else |
846 | SplitOps[J] = ScatterOps[J][I]; |
847 | } |
848 | Res[I] = Builder.CreateGEP(Ty: GEPI.getSourceElementType(), Ptr: SplitOps[0], |
849 | IdxList: ArrayRef(SplitOps).drop_front(), |
850 | Name: GEPI.getName() + ".i" + Twine(I)); |
851 | if (GEPI.isInBounds()) |
852 | if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Val: Res[I])) |
853 | NewGEPI->setIsInBounds(); |
854 | } |
855 | gather(Op: &GEPI, CV: Res, VS: *VS); |
856 | return true; |
857 | } |
858 | |
859 | bool ScalarizerVisitor::visitCastInst(CastInst &CI) { |
860 | std::optional<VectorSplit> DestVS = getVectorSplit(Ty: CI.getDestTy()); |
861 | if (!DestVS) |
862 | return false; |
863 | |
864 | std::optional<VectorSplit> SrcVS = getVectorSplit(Ty: CI.getSrcTy()); |
865 | if (!SrcVS || SrcVS->NumPacked != DestVS->NumPacked) |
866 | return false; |
867 | |
868 | IRBuilder<> Builder(&CI); |
869 | Scatterer Op0 = scatter(Point: &CI, V: CI.getOperand(i_nocapture: 0), VS: *SrcVS); |
870 | assert(Op0.size() == SrcVS->NumFragments && "Mismatched cast" ); |
871 | ValueVector Res; |
872 | Res.resize(N: DestVS->NumFragments); |
873 | for (unsigned I = 0; I < DestVS->NumFragments; ++I) |
874 | Res[I] = |
875 | Builder.CreateCast(Op: CI.getOpcode(), V: Op0[I], DestTy: DestVS->getFragmentType(I), |
876 | Name: CI.getName() + ".i" + Twine(I)); |
877 | gather(Op: &CI, CV: Res, VS: *DestVS); |
878 | return true; |
879 | } |
880 | |
881 | bool ScalarizerVisitor::visitBitCastInst(BitCastInst &BCI) { |
882 | std::optional<VectorSplit> DstVS = getVectorSplit(Ty: BCI.getDestTy()); |
883 | std::optional<VectorSplit> SrcVS = getVectorSplit(Ty: BCI.getSrcTy()); |
884 | if (!DstVS || !SrcVS || DstVS->RemainderTy || SrcVS->RemainderTy) |
885 | return false; |
886 | |
887 | const bool isPointerTy = DstVS->VecTy->getElementType()->isPointerTy(); |
888 | |
889 | // Vectors of pointers are always fully scalarized. |
890 | assert(!isPointerTy || (DstVS->NumPacked == 1 && SrcVS->NumPacked == 1)); |
891 | |
892 | IRBuilder<> Builder(&BCI); |
893 | Scatterer Op0 = scatter(Point: &BCI, V: BCI.getOperand(i_nocapture: 0), VS: *SrcVS); |
894 | ValueVector Res; |
895 | Res.resize(N: DstVS->NumFragments); |
896 | |
897 | unsigned DstSplitBits = DstVS->SplitTy->getPrimitiveSizeInBits(); |
898 | unsigned SrcSplitBits = SrcVS->SplitTy->getPrimitiveSizeInBits(); |
899 | |
900 | if (isPointerTy || DstSplitBits == SrcSplitBits) { |
901 | assert(DstVS->NumFragments == SrcVS->NumFragments); |
902 | for (unsigned I = 0; I < DstVS->NumFragments; ++I) { |
903 | Res[I] = Builder.CreateBitCast(V: Op0[I], DestTy: DstVS->getFragmentType(I), |
904 | Name: BCI.getName() + ".i" + Twine(I)); |
905 | } |
906 | } else if (SrcSplitBits % DstSplitBits == 0) { |
907 | // Convert each source fragment to the same-sized destination vector and |
908 | // then scatter the result to the destination. |
909 | VectorSplit MidVS; |
910 | MidVS.NumPacked = DstVS->NumPacked; |
911 | MidVS.NumFragments = SrcSplitBits / DstSplitBits; |
912 | MidVS.VecTy = FixedVectorType::get(ElementType: DstVS->VecTy->getElementType(), |
913 | NumElts: MidVS.NumPacked * MidVS.NumFragments); |
914 | MidVS.SplitTy = DstVS->SplitTy; |
915 | |
916 | unsigned ResI = 0; |
917 | for (unsigned I = 0; I < SrcVS->NumFragments; ++I) { |
918 | Value *V = Op0[I]; |
919 | |
920 | // Look through any existing bitcasts before converting to <N x t2>. |
921 | // In the best case, the resulting conversion might be a no-op. |
922 | Instruction *VI; |
923 | while ((VI = dyn_cast<Instruction>(Val: V)) && |
924 | VI->getOpcode() == Instruction::BitCast) |
925 | V = VI->getOperand(i: 0); |
926 | |
927 | V = Builder.CreateBitCast(V, DestTy: MidVS.VecTy, Name: V->getName() + ".cast" ); |
928 | |
929 | Scatterer Mid = scatter(Point: &BCI, V, VS: MidVS); |
930 | for (unsigned J = 0; J < MidVS.NumFragments; ++J) |
931 | Res[ResI++] = Mid[J]; |
932 | } |
933 | } else if (DstSplitBits % SrcSplitBits == 0) { |
934 | // Gather enough source fragments to make up a destination fragment and |
935 | // then convert to the destination type. |
936 | VectorSplit MidVS; |
937 | MidVS.NumFragments = DstSplitBits / SrcSplitBits; |
938 | MidVS.NumPacked = SrcVS->NumPacked; |
939 | MidVS.VecTy = FixedVectorType::get(ElementType: SrcVS->VecTy->getElementType(), |
940 | NumElts: MidVS.NumPacked * MidVS.NumFragments); |
941 | MidVS.SplitTy = SrcVS->SplitTy; |
942 | |
943 | unsigned SrcI = 0; |
944 | SmallVector<Value *, 8> ConcatOps; |
945 | ConcatOps.resize(N: MidVS.NumFragments); |
946 | for (unsigned I = 0; I < DstVS->NumFragments; ++I) { |
947 | for (unsigned J = 0; J < MidVS.NumFragments; ++J) |
948 | ConcatOps[J] = Op0[SrcI++]; |
949 | Value *V = concatenate(Builder, Fragments: ConcatOps, VS: MidVS, |
950 | Name: BCI.getName() + ".i" + Twine(I)); |
951 | Res[I] = Builder.CreateBitCast(V, DestTy: DstVS->getFragmentType(I), |
952 | Name: BCI.getName() + ".i" + Twine(I)); |
953 | } |
954 | } else { |
955 | return false; |
956 | } |
957 | |
958 | gather(Op: &BCI, CV: Res, VS: *DstVS); |
959 | return true; |
960 | } |
961 | |
962 | bool ScalarizerVisitor::visitInsertElementInst(InsertElementInst &IEI) { |
963 | std::optional<VectorSplit> VS = getVectorSplit(Ty: IEI.getType()); |
964 | if (!VS) |
965 | return false; |
966 | |
967 | IRBuilder<> Builder(&IEI); |
968 | Scatterer Op0 = scatter(Point: &IEI, V: IEI.getOperand(i_nocapture: 0), VS: *VS); |
969 | Value *NewElt = IEI.getOperand(i_nocapture: 1); |
970 | Value *InsIdx = IEI.getOperand(i_nocapture: 2); |
971 | |
972 | ValueVector Res; |
973 | Res.resize(N: VS->NumFragments); |
974 | |
975 | if (auto *CI = dyn_cast<ConstantInt>(Val: InsIdx)) { |
976 | unsigned Idx = CI->getZExtValue(); |
977 | unsigned Fragment = Idx / VS->NumPacked; |
978 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
979 | if (I == Fragment) { |
980 | bool IsPacked = VS->NumPacked > 1; |
981 | if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && |
982 | !VS->RemainderTy->isVectorTy()) |
983 | IsPacked = false; |
984 | if (IsPacked) { |
985 | Res[I] = |
986 | Builder.CreateInsertElement(Vec: Op0[I], NewElt, Idx: Idx % VS->NumPacked); |
987 | } else { |
988 | Res[I] = NewElt; |
989 | } |
990 | } else { |
991 | Res[I] = Op0[I]; |
992 | } |
993 | } |
994 | } else { |
995 | // Never split a variable insertelement that isn't fully scalarized. |
996 | if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) |
997 | return false; |
998 | |
999 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
1000 | Value *ShouldReplace = |
1001 | Builder.CreateICmpEQ(LHS: InsIdx, RHS: ConstantInt::get(Ty: InsIdx->getType(), V: I), |
1002 | Name: InsIdx->getName() + ".is." + Twine(I)); |
1003 | Value *OldElt = Op0[I]; |
1004 | Res[I] = Builder.CreateSelect(C: ShouldReplace, True: NewElt, False: OldElt, |
1005 | Name: IEI.getName() + ".i" + Twine(I)); |
1006 | } |
1007 | } |
1008 | |
1009 | gather(Op: &IEI, CV: Res, VS: *VS); |
1010 | return true; |
1011 | } |
1012 | |
1013 | bool ScalarizerVisitor::(ExtractElementInst &EEI) { |
1014 | std::optional<VectorSplit> VS = getVectorSplit(Ty: EEI.getOperand(i_nocapture: 0)->getType()); |
1015 | if (!VS) |
1016 | return false; |
1017 | |
1018 | IRBuilder<> Builder(&EEI); |
1019 | Scatterer Op0 = scatter(Point: &EEI, V: EEI.getOperand(i_nocapture: 0), VS: *VS); |
1020 | Value *ExtIdx = EEI.getOperand(i_nocapture: 1); |
1021 | |
1022 | if (auto *CI = dyn_cast<ConstantInt>(Val: ExtIdx)) { |
1023 | unsigned Idx = CI->getZExtValue(); |
1024 | unsigned Fragment = Idx / VS->NumPacked; |
1025 | Value *Res = Op0[Fragment]; |
1026 | bool IsPacked = VS->NumPacked > 1; |
1027 | if (Fragment == VS->NumFragments - 1 && VS->RemainderTy && |
1028 | !VS->RemainderTy->isVectorTy()) |
1029 | IsPacked = false; |
1030 | if (IsPacked) |
1031 | Res = Builder.CreateExtractElement(Vec: Res, Idx: Idx % VS->NumPacked); |
1032 | replaceUses(Op: &EEI, CV: Res); |
1033 | return true; |
1034 | } |
1035 | |
1036 | // Never split a variable extractelement that isn't fully scalarized. |
1037 | if (!ScalarizeVariableInsertExtract || VS->NumPacked > 1) |
1038 | return false; |
1039 | |
1040 | Value *Res = PoisonValue::get(T: VS->VecTy->getElementType()); |
1041 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
1042 | Value * = |
1043 | Builder.CreateICmpEQ(LHS: ExtIdx, RHS: ConstantInt::get(Ty: ExtIdx->getType(), V: I), |
1044 | Name: ExtIdx->getName() + ".is." + Twine(I)); |
1045 | Value *Elt = Op0[I]; |
1046 | Res = Builder.CreateSelect(C: ShouldExtract, True: Elt, False: Res, |
1047 | Name: EEI.getName() + ".upto" + Twine(I)); |
1048 | } |
1049 | replaceUses(Op: &EEI, CV: Res); |
1050 | return true; |
1051 | } |
1052 | |
1053 | bool ScalarizerVisitor::visitShuffleVectorInst(ShuffleVectorInst &SVI) { |
1054 | std::optional<VectorSplit> VS = getVectorSplit(Ty: SVI.getType()); |
1055 | std::optional<VectorSplit> VSOp = |
1056 | getVectorSplit(Ty: SVI.getOperand(i_nocapture: 0)->getType()); |
1057 | if (!VS || !VSOp || VS->NumPacked > 1 || VSOp->NumPacked > 1) |
1058 | return false; |
1059 | |
1060 | Scatterer Op0 = scatter(Point: &SVI, V: SVI.getOperand(i_nocapture: 0), VS: *VSOp); |
1061 | Scatterer Op1 = scatter(Point: &SVI, V: SVI.getOperand(i_nocapture: 1), VS: *VSOp); |
1062 | ValueVector Res; |
1063 | Res.resize(N: VS->NumFragments); |
1064 | |
1065 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
1066 | int Selector = SVI.getMaskValue(Elt: I); |
1067 | if (Selector < 0) |
1068 | Res[I] = PoisonValue::get(T: VS->VecTy->getElementType()); |
1069 | else if (unsigned(Selector) < Op0.size()) |
1070 | Res[I] = Op0[Selector]; |
1071 | else |
1072 | Res[I] = Op1[Selector - Op0.size()]; |
1073 | } |
1074 | gather(Op: &SVI, CV: Res, VS: *VS); |
1075 | return true; |
1076 | } |
1077 | |
1078 | bool ScalarizerVisitor::visitPHINode(PHINode &PHI) { |
1079 | std::optional<VectorSplit> VS = getVectorSplit(Ty: PHI.getType()); |
1080 | if (!VS) |
1081 | return false; |
1082 | |
1083 | IRBuilder<> Builder(&PHI); |
1084 | ValueVector Res; |
1085 | Res.resize(N: VS->NumFragments); |
1086 | |
1087 | unsigned NumOps = PHI.getNumOperands(); |
1088 | for (unsigned I = 0; I < VS->NumFragments; ++I) { |
1089 | Res[I] = Builder.CreatePHI(Ty: VS->getFragmentType(I), NumReservedValues: NumOps, |
1090 | Name: PHI.getName() + ".i" + Twine(I)); |
1091 | } |
1092 | |
1093 | for (unsigned I = 0; I < NumOps; ++I) { |
1094 | Scatterer Op = scatter(Point: &PHI, V: PHI.getIncomingValue(i: I), VS: *VS); |
1095 | BasicBlock *IncomingBlock = PHI.getIncomingBlock(i: I); |
1096 | for (unsigned J = 0; J < VS->NumFragments; ++J) |
1097 | cast<PHINode>(Val: Res[J])->addIncoming(V: Op[J], BB: IncomingBlock); |
1098 | } |
1099 | gather(Op: &PHI, CV: Res, VS: *VS); |
1100 | return true; |
1101 | } |
1102 | |
1103 | bool ScalarizerVisitor::visitLoadInst(LoadInst &LI) { |
1104 | if (!ScalarizeLoadStore) |
1105 | return false; |
1106 | if (!LI.isSimple()) |
1107 | return false; |
1108 | |
1109 | std::optional<VectorLayout> Layout = getVectorLayout( |
1110 | Ty: LI.getType(), Alignment: LI.getAlign(), DL: LI.getDataLayout()); |
1111 | if (!Layout) |
1112 | return false; |
1113 | |
1114 | IRBuilder<> Builder(&LI); |
1115 | Scatterer Ptr = scatter(Point: &LI, V: LI.getPointerOperand(), VS: Layout->VS); |
1116 | ValueVector Res; |
1117 | Res.resize(N: Layout->VS.NumFragments); |
1118 | |
1119 | for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { |
1120 | Res[I] = Builder.CreateAlignedLoad(Ty: Layout->VS.getFragmentType(I), Ptr: Ptr[I], |
1121 | Align: Align(Layout->getFragmentAlign(Frag: I)), |
1122 | Name: LI.getName() + ".i" + Twine(I)); |
1123 | } |
1124 | gather(Op: &LI, CV: Res, VS: Layout->VS); |
1125 | return true; |
1126 | } |
1127 | |
1128 | bool ScalarizerVisitor::visitStoreInst(StoreInst &SI) { |
1129 | if (!ScalarizeLoadStore) |
1130 | return false; |
1131 | if (!SI.isSimple()) |
1132 | return false; |
1133 | |
1134 | Value *FullValue = SI.getValueOperand(); |
1135 | std::optional<VectorLayout> Layout = getVectorLayout( |
1136 | Ty: FullValue->getType(), Alignment: SI.getAlign(), DL: SI.getDataLayout()); |
1137 | if (!Layout) |
1138 | return false; |
1139 | |
1140 | IRBuilder<> Builder(&SI); |
1141 | Scatterer VPtr = scatter(Point: &SI, V: SI.getPointerOperand(), VS: Layout->VS); |
1142 | Scatterer VVal = scatter(Point: &SI, V: FullValue, VS: Layout->VS); |
1143 | |
1144 | ValueVector Stores; |
1145 | Stores.resize(N: Layout->VS.NumFragments); |
1146 | for (unsigned I = 0; I < Layout->VS.NumFragments; ++I) { |
1147 | Value *Val = VVal[I]; |
1148 | Value *Ptr = VPtr[I]; |
1149 | Stores[I] = |
1150 | Builder.CreateAlignedStore(Val, Ptr, Align: Layout->getFragmentAlign(Frag: I)); |
1151 | } |
1152 | transferMetadataAndIRFlags(Op: &SI, CV: Stores); |
1153 | return true; |
1154 | } |
1155 | |
1156 | bool ScalarizerVisitor::visitCallInst(CallInst &CI) { |
1157 | return splitCall(CI); |
1158 | } |
1159 | |
1160 | bool ScalarizerVisitor::visitFreezeInst(FreezeInst &FI) { |
1161 | return splitUnary(I&: FI, Split: [](IRBuilder<> &Builder, Value *Op, const Twine &Name) { |
1162 | return Builder.CreateFreeze(V: Op, Name); |
1163 | }); |
1164 | } |
1165 | |
1166 | // Delete the instructions that we scalarized. If a full vector result |
1167 | // is still needed, recreate it using InsertElements. |
1168 | bool ScalarizerVisitor::finish() { |
1169 | // The presence of data in Gathered or Scattered indicates changes |
1170 | // made to the Function. |
1171 | if (Gathered.empty() && Scattered.empty() && !Scalarized) |
1172 | return false; |
1173 | for (const auto &GMI : Gathered) { |
1174 | Instruction *Op = GMI.first; |
1175 | ValueVector &CV = *GMI.second; |
1176 | if (!Op->use_empty()) { |
1177 | // The value is still needed, so recreate it using a series of |
1178 | // insertelements and/or shufflevectors. |
1179 | Value *Res; |
1180 | if (auto *Ty = dyn_cast<FixedVectorType>(Val: Op->getType())) { |
1181 | BasicBlock *BB = Op->getParent(); |
1182 | IRBuilder<> Builder(Op); |
1183 | if (isa<PHINode>(Val: Op)) |
1184 | Builder.SetInsertPoint(TheBB: BB, IP: BB->getFirstInsertionPt()); |
1185 | |
1186 | VectorSplit VS = *getVectorSplit(Ty); |
1187 | assert(VS.NumFragments == CV.size()); |
1188 | |
1189 | Res = concatenate(Builder, Fragments: CV, VS, Name: Op->getName()); |
1190 | |
1191 | Res->takeName(V: Op); |
1192 | } else { |
1193 | assert(CV.size() == 1 && Op->getType() == CV[0]->getType()); |
1194 | Res = CV[0]; |
1195 | if (Op == Res) |
1196 | continue; |
1197 | } |
1198 | Op->replaceAllUsesWith(V: Res); |
1199 | } |
1200 | PotentiallyDeadInstrs.emplace_back(Args&: Op); |
1201 | } |
1202 | Gathered.clear(); |
1203 | Scattered.clear(); |
1204 | Scalarized = false; |
1205 | |
1206 | RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts&: PotentiallyDeadInstrs); |
1207 | |
1208 | return true; |
1209 | } |
1210 | |
1211 | PreservedAnalyses ScalarizerPass::run(Function &F, FunctionAnalysisManager &AM) { |
1212 | DominatorTree *DT = &AM.getResult<DominatorTreeAnalysis>(IR&: F); |
1213 | ScalarizerVisitor Impl(DT, Options); |
1214 | bool Changed = Impl.visit(F); |
1215 | PreservedAnalyses PA; |
1216 | PA.preserve<DominatorTreeAnalysis>(); |
1217 | return Changed ? PA : PreservedAnalyses::all(); |
1218 | } |
1219 | |